emmintrin.h文件解析


开始分析该文件的主要的两个类型构造。

typedef union __declspec(intrin_type) _CRT_ALIGN(16) __m128i {
    __int8              m128i_i8[16];
    __int16             m128i_i16[8];
    __int32             m128i_i32[4];    
    __int64             m128i_i64[2];
    unsigned __int8     m128i_u8[16];
    unsigned __int16    m128i_u16[8];
    unsigned __int32    m128i_u32[4];
    unsigned __int64    m128i_u64[2];
} __m128i;

typedef struct __declspec(intrin_type) _CRT_ALIGN(16) __m128d {
    double              m128d_f64[2];
} __m128d;

  __m128i a;

  __m128d c;

编译调试其结构如下:
emmintrin.h文件解析_第1张图片


下面开始 算法的分析:

加 减 乘 除  开方 求最大 最小 函数

/*
 * DP, arithmetic
 */

extern __m128d _mm_add_sd(__m128d _A, __m128d _B);
extern __m128d _mm_add_pd(__m128d _A, __m128d _B);
extern __m128d _mm_sub_sd(__m128d _A, __m128d _B);
extern __m128d _mm_sub_pd(__m128d _A, __m128d _B);
extern __m128d _mm_mul_sd(__m128d _A, __m128d _B);
extern __m128d _mm_mul_pd(__m128d _A, __m128d _B);
extern __m128d _mm_sqrt_sd(__m128d _A, __m128d _B);
extern __m128d _mm_sqrt_pd(__m128d _A);
extern __m128d _mm_div_sd(__m128d _A, __m128d _B);
extern __m128d _mm_div_pd(__m128d _A, __m128d _B);
extern __m128d _mm_min_sd(__m128d _A, __m128d _B);
extern __m128d _mm_min_pd(__m128d _A, __m128d _B);
extern __m128d _mm_max_sd(__m128d _A, __m128d _B);
extern __m128d _mm_max_pd(__m128d _A, __m128d _B);
逻辑运算,与  或 异或  与或非 函数
/*
 * DP, logicals
 */

extern __m128d _mm_and_pd(__m128d _A, __m128d _B);
extern __m128d _mm_andnot_pd(__m128d _A, __m128d _B);
extern __m128d _mm_or_pd(__m128d _A, __m128d _B);
extern __m128d _mm_xor_pd(__m128d _A, __m128d _B);


	//返回一个__m128d的寄存器,r0=_A0 & _B0, r1=_A1 & _B1
	extern __m128d _mm_and_pd(__m128d _A, __m128d _B);
	//返回一个__m128d的寄存器,r0=(~_A0) & _B0, r1=(~_A1) & _B1
	extern __m128d _mm_andnot_pd(__m128d _A, __m128d _B);
	//返回一个__m128d的寄存器,r0=_A0 | _B0, r1=_A1 | _B1
	extern __m128d _mm_or_pd(__m128d _A, __m128d _B);
	//返回一个__m128d的寄存器,r0=_A0 ^ _B0, r1=_A1 ^ _B1
	extern __m128d _mm_xor_pd(__m128d _A, __m128d _B);

	//Comparisions:==、<、<=、>、>=、!=
	//返回一个__m128d的寄存器,r0=(_A0 == _B0) ? 0xffffffffffffffff : 0x0, r1=_A1
	extern __m128d _mm_cmpeq_sd(__m128d _A, __m128d _B);
	//返回一个__m128d的寄存器,r0=(_A0 == _B0) ? 0xffffffffffffffff : 0x0, 
	//r1=(_A1 == _B1) ? 0xffffffffffffffff : 0x0
	extern __m128d _mm_cmpeq_pd(__m128d _A, __m128d _B);
	//返回一个__m128d的寄存器,r0=(_A0 < _B0) ? 0xffffffffffffffff : 0x0, r1=_A1
	extern __m128d _mm_cmplt_sd(__m128d _A, __m128d _B);
	//返回一个__m128d的寄存器,r0=(_A0 < _B0) ? 0xffffffffffffffff : 0x0, 
	//r1=(_A1 < _B1) ? 0xffffffffffffffff : 0x0
	extern __m128d _mm_cmplt_pd(__m128d _A, __m128d _B);
	//返回一个__m128d的寄存器,r0=(_A0 <= _B0) ? 0xffffffffffffffff : 0x0, r1=_A1
	extern __m128d _mm_cmple_sd(__m128d _A, __m128d _B);
	//返回一个__m128d的寄存器,r0=(_A0 <= _B0) ? 0xffffffffffffffff : 0x0, 
	//r1=(_A1 <= _B1) ? 0xffffffffffffffff : 0x0
	extern __m128d _mm_cmple_pd(__m128d _A, __m128d _B);
	//返回一个__m128d的寄存器,r0=(_A0 > _B0) ? 0xffffffffffffffff : 0x0, r1=_A1
	extern __m128d _mm_cmpgt_sd(__m128d _A, __m128d _B);
	//返回一个__m128d的寄存器,r0=(_A0 > _B0) ? 0xffffffffffffffff : 0x0, 
	//r1=(_A1 > _B1) ? 0xffffffffffffffff : 0x0
	extern __m128d _mm_cmpgt_pd(__m128d _A, __m128d _B);
	//返回一个__m128d的寄存器,r0=(_A0 >= _B0) ? 0xffffffffffffffff : 0x0, r1=_A1
	extern __m128d _mm_cmpge_sd(__m128d _A, __m128d _B);
	//返回一个__m128d的寄存器,r0=(_A0 >= _B0) ? 0xffffffffffffffff : 0x0, 
	//r1=(_A1 >= _B1) ? 0xffffffffffffffff : 0x0
	extern __m128d _mm_cmpge_pd(__m128d _A, __m128d _B);
	//返回一个__m128d的寄存器,r0=(_A0 != _B0) ? 0xffffffffffffffff : 0x0, r1=_A1
	extern __m128d _mm_cmpneq_sd(__m128d _A, __m128d _B);
	//返回一个__m128d的寄存器,r0=(_A0 != _B0) ? 0xffffffffffffffff : 0x0, 
	//r1=(_A1 != _B1) ? 0xffffffffffffffff : 0x0
	extern __m128d _mm_cmpneq_pd(__m128d _A, __m128d _B);
	//返回一个__m128d的寄存器,r0=!(_A0 < _B0) ? 0xffffffffffffffff : 0x0, r1=_A1
	extern __m128d _mm_cmpnlt_sd(__m128d _A, __m128d _B);
	//返回一个__m128d的寄存器,r0=!(_A0 < _B0) ? 0xffffffffffffffff : 0x0, 
	//r1=!(_A1 < _B1) ? 0xffffffffffffffff : 0x0
	extern __m128d _mm_cmpnlt_pd(__m128d _A, __m128d _B);
	//返回一个__m128d的寄存器,r0=!(_A0 <= _B0) ? 0xffffffffffffffff : 0x0, r1=_A1
	extern __m128d _mm_cmpnle_sd(__m128d _A, __m128d _B);
	//返回一个__m128d的寄存器,r0=!(_A0 <= _B0) ? 0xffffffffffffffff : 0x0, 
	//r1=!(_A1 <= _B1) ? 0xffffffffffffffff : 0x0
	extern __m128d _mm_cmpnle_pd(__m128d _A, __m128d _B);
	//返回一个__m128d的寄存器,r0=!(_A0 > _B0) ? 0xffffffffffffffff : 0x0, r1=_A1
	extern __m128d _mm_cmpngt_sd(__m128d _A, __m128d _B);
	//返回一个__m128d的寄存器,r0=!(_A0 > _B0) ? 0xffffffffffffffff : 0x0, 
	//r1=!(_A1 > _B1) ? 0xffffffffffffffff : 0x0
	extern __m128d _mm_cmpngt_pd(__m128d _A, __m128d _B);
	//返回一个__m128d的寄存器,r0=!(_A0 >= _B0) ? 0xffffffffffffffff : 0x0, r1=_A1
	extern __m128d _mm_cmpnge_sd(__m128d _A, __m128d _B);
	//返回一个__m128d的寄存器,r0=!(_A0 >= _B0) ? 0xffffffffffffffff : 0x0,
	//r1=!(_A1 >= _B1) ? 0xffffffffffffffff : 0x0
	extern __m128d _mm_cmpnge_pd(__m128d _A, __m128d _B);
	//返回一个__m128d的寄存器,r0=(_A0 ord _B0) ? 0xffffffffffffffff : 0x0, 
	//r1=(_A1 ord _B1) ? 0xffffffffffffffff : 0x0
	extern __m128d _mm_cmpord_pd(__m128d _A, __m128d _B);
	//返回一个__m128d的寄存器,r0=(_A0 ord _B0) ? 0xffffffffffffffff : 0x0, r1=_A1
	extern __m128d _mm_cmpord_sd(__m128d _A, __m128d _B);
	//返回一个__m128d的寄存器,r0=(_A0 unord _B0) ? 0xffffffffffffffff : 0x0, 
	//r1=(_A1 unord _B1) ? 0xffffffffffffffff : 0x0
	extern __m128d _mm_cmpunord_pd(__m128d _A, __m128d _B);
	//返回一个__m128d的寄存器,r0=(_A0 unord _B0) ? 0xffffffffffffffff : 0x0, r1=_A1
	extern __m128d _mm_cmpunord_sd(__m128d _A, __m128d _B);
	//返回一个0或1的整数,r=(_A0 != _B0) ? 0x1 : 0x0, If _A and _B is a NaN, 1 is returned
	extern int _mm_comieq_sd(__m128d _A, __m128d _B);
	//返回一个0或1的整数,r=(_A0 < _B0) ? 0x1 : 0x0, If _A and _B is a NaN, 1 is returned
	extern int _mm_comilt_sd(__m128d _A, __m128d _B);
	//返回一个0或1的整数,r=(_A0 <= _B0) ? 0x1 : 0x0, If _A and _B is a NaN, 1 is returned
	extern int _mm_comile_sd(__m128d _A, __m128d _B);
	//返回一个0或1的整数,r=(_A0 > _B0) ? 0x1 : 0x0, If _A and _B is a NaN, 0 is returned
	extern int _mm_comigt_sd(__m128d _A, __m128d _B);
	//返回一个0或1的整数,r=(_A0 >= _B0) ? 0x1 : 0x0, If _A and _B is a NaN, 0 is returned
	extern int _mm_comige_sd(__m128d _A, __m128d _B);
	//返回一个0或1的整数,r=(_A0 != _B0) ? 0x1 : 0x0, If _A and _B is a NaN, 0 is returned
	extern int _mm_comineq_sd(__m128d _A, __m128d _B);
	//返回一个0或1的整数,r=(_A0 == _B0) ? 0x1 : 0x0, If _A and _B is a NaN, 1 is returned
	extern int _mm_ucomieq_sd(__m128d _A, __m128d _B);
	//返回一个0或1的整数,r=(_A0 < _B0) ? 0x1 : 0x0, If _A and _B is a NaN, 1 is returned
	extern int _mm_ucomilt_sd(__m128d _A, __m128d _B);
	//返回一个0或1的整数,r=(_A0 <= _B0) ? 0x1 : 0x0, If _A and _B is a NaN, 1 is returned
	extern int _mm_ucomile_sd(__m128d _A, __m128d _B);
	//返回一个0或1的整数,r=(_A0 > _B0) ? 0x1 : 0x0, If _A and _B is a NaN, 0 is returned
	extern int _mm_ucomigt_sd(__m128d _A, __m128d _B);
	//返回一个0或1的整数,r=(_A0 >= _B0) ? 0x1 : 0x0, If _A and _B is a NaN, 0 is returned
	extern int _mm_ucomige_sd(__m128d _A, __m128d _B);
	//返回一个0或1的整数,r=(_A0 != _B0) ? 0x1 : 0x0, If _A and _B is a NaN, 0 is returned
	extern int _mm_ucomineq_sd(__m128d _A, __m128d _B);

//Conversion Operations
	//返回一个__m128d的寄存器,r0=(dobule)_A0, r1=(double)_A1
	extern __m128d _mm_cvtepi32_pd(__m128i _A);
	//返回一个__m128i的寄存器,r0=(int)_A0, r1=(int)_A1, r2=0x0, r3=0x0
	extern __m128i _mm_cvtpd_epi32(__m128d _A);
	//返回一个__m128i的寄存器,r0=(int)_A0, r1=(int)_A1, r2=0x0, r3=0x0,using truncate
	extern __m128i _mm_cvttpd_epi32(__m128d _A);
	//返回一个__m128的寄存器,r0=(flaot)_A0, r1=(float)_A1, r2=(float)_A2, r3=(float)_A3
	extern __m128 _mm_cvtepi32_ps(__m128i _A);
	//返回一个__m128i的寄存器,r0=(int)_A0, r1=(int)_A1, r2=(int)_A2, r3=(int)_A3
	extern __m128i _mm_cvtps_epi32(__m128 _A);
	//返回一个__m128i的寄存器,r0=(int)_A0, r1=(int)_A1, r2=(int)_A2, r3=(int)_A3,using truncate
	extern __m128i _mm_cvttps_epi32(__m128 _A);
	//返回一个__m128的寄存器,r0=(flaot)_A0, r1=(float)_A1, r2=0.0, r3=0.0
	extern __m128 _mm_cvtpd_ps(__m128d _A);
	//返回一个__m128d的寄存器,r0=(dobule)_A0, r1=(double)_A1
	extern __m128d _mm_cvtps_pd(__m128 _A);
	//返回一个__m128的寄存器,r0=(float)_B0, r1=_B1, r2=_B2, r3=_B3
	extern __m128 _mm_cvtsd_ss(__m128 _A, __m128d _B);
	//返回一个__m128d的寄存器,r0=(double)_B0, r1=_A1
	extern __m128d _mm_cvtss_sd(__m128d _A, __m128 _B);
	//返回一个32bit整数,r=(int)_A0
	extern int _mm_cvtsd_si32(__m128d _A);
	//返回一个32bit整数,r=(int)_A0,using truncate
	extern int _mm_cvttsd_si32(__m128d _A);
	//返回一个__m128d的寄存器,r0=(double)_B, r1=_A1
	extern __m128d _mm_cvtsi32_sd(__m128d _A, int _B);
	//返回一个__m64的寄存器,r0=(int)_A0, r1=(int)_A1
	extern __m64 _mm_cvtpd_pi32(__m128d _A);
	//返回一个__m64的寄存器,r0=(int)_A0, r1=(int)_A1,using truncate
	extern __m64 _mm_cvttpd_pi32(__m128d _A);
	//返回一个__m128d的寄存器,r0=(dobule)_A0, r1=(double)_A1
	extern __m128d _mm_cvtpi32_pd(__m64 _A);


/*
 * Integer, misc
 */

extern __m128i _mm_packs_epi16(__m128i _A, __m128i _B);
extern __m128i _mm_packs_epi32(__m128i _A, __m128i _B);
extern __m128i _mm_packus_epi16(__m128i _A, __m128i _B);
extern int _mm_extract_epi16(__m128i _A, int _Imm);
extern __m128i _mm_insert_epi16(__m128i _A, int _B, int _Imm);
extern int _mm_movemask_epi8(__m128i _A);
extern __m128i _mm_shuffle_epi32(__m128i _A, int _Imm);
extern __m128i _mm_shufflehi_epi16(__m128i _A, int _Imm);
extern __m128i _mm_shufflelo_epi16(__m128i _A, int _Imm);
extern __m128i _mm_unpackhi_epi8(__m128i _A, __m128i _B);
extern __m128i _mm_unpackhi_epi16(__m128i _A, __m128i _B);
extern __m128i _mm_unpackhi_epi32(__m128i _A, __m128i _B);
extern __m128i _mm_unpackhi_epi64(__m128i _A, __m128i _B);
extern __m128i _mm_unpacklo_epi8(__m128i _A, __m128i _B);
extern __m128i _mm_unpacklo_epi16(__m128i _A, __m128i _B);
extern __m128i _mm_unpacklo_epi32(__m128i _A, __m128i _B);
extern __m128i _mm_unpacklo_epi64(__m128i _A, __m128i _B);

/*
 * Integer, loads
 */

extern __m128i _mm_load_si128(__m128i const*_P);
extern __m128i _mm_loadu_si128(__m128i const*_P);
extern __m128i _mm_loadl_epi64(__m128i const*_P);

/*
 * Integer, sets
 */

extern __m128i _mm_set_epi64(__m64 _Q1, __m64 _Q0);
extern __m128i _mm_set_epi32(int _I3, int _I2, int _I1, int _I0);
extern __m128i _mm_set_epi16(short _W7, short _W6, short _W5, short _W4,
                             short _W3, short _W2, short _W1, short _W0);
extern __m128i _mm_set_epi8(char _B15, char _B14, char _B13, char _B12, 
                            char _B11, char _B10, char _B9, char _B8, 
                            char _B7, char _B6, char _B5, char _B4, 
                            char _B3, char _B2, char _B1, char _B0);
extern __m128i _mm_set1_epi64(__m64 _Q);
extern __m128i _mm_set1_epi32(int _I);
extern __m128i _mm_set1_epi16(short _W);
extern __m128i _mm_set1_epi8(char _B);
extern __m128i _mm_setl_epi64(__m128i _Q);
extern __m128i _mm_setr_epi64(__m64 _Q0, __m64 _Q1);
extern __m128i _mm_setr_epi32(int _I0, int _I1, int _I2, int _I3);
extern __m128i _mm_setr_epi16(short _W0, short _W1, short _W2, short _W3, 
                              short _W4, short _W5, short _W6, short _W7);
extern __m128i _mm_setr_epi8(char _B15, char _B14, char _B13, char _B12, 
                             char _B11, char _B10, char _B9, char _B8, 
                             char _B7, char _B6, char _B5, char _B4, 
                             char _B3, char _B2, char _B1, char _B0);
extern __m128i _mm_setzero_si128(void);

/*
 * Integer, stores
 */

extern void _mm_store_si128(__m128i *_P, __m128i _B);
extern void _mm_storeu_si128(__m128i *_P, __m128i _B);
extern void _mm_storel_epi64(__m128i *_P, __m128i _Q);
extern void _mm_maskmoveu_si128(__m128i _D, __m128i _N, char *_P);

/*
 * Integer, moves
 */

extern __m128i _mm_move_epi64(__m128i _Q);
extern __m128i _mm_movpi64_epi64(__m64 _Q);
extern __m64 _mm_movepi64_pi64(__m128i _Q);


/*
 * Cacheability support
 */

extern void _mm_stream_pd(double *_Dp, __m128d _A);
extern void _mm_stream_si128(__m128i *_P, __m128i _A);
extern void _mm_clflush(void const*_P);
extern void _mm_lfence(void);
extern void _mm_mfence(void);
extern void _mm_stream_si32(int *_P, int _I);
extern void _mm_pause(void);

/*
 * New convert to float
 */

extern double _mm_cvtsd_f64(__m128d _A);

/*
 * Support for casting between various SP, DP, INT vector types.
 * Note that these do no conversion of values, they just change
 * the type.
 */

extern __m128  _mm_castpd_ps(__m128d);
extern __m128i _mm_castpd_si128(__m128d);
extern __m128d _mm_castps_pd(__m128);
extern __m128i _mm_castps_si128(__m128);
extern __m128  _mm_castsi128_ps(__m128i);
extern __m128d _mm_castsi128_pd(__m128i);

/*
 * Support for 64-bit extension intrinsics
 */

#if defined(_M_X64)
extern __int64 _mm_cvtsd_si64(__m128d);
extern __int64 _mm_cvttsd_si64(__m128d);
extern __m128d _mm_cvtsi64_sd(__m128d, __int64);
extern __m128i _mm_cvtsi64_si128(__int64);
extern __int64 _mm_cvtsi128_si64(__m128i);
/* Alternate intrinsic name definitions */
#define _mm_stream_si64 _mm_stream_si64x

小结:SIMD相关头文件介绍。


你可能感兴趣的:(SIMD,sse)