Volume 2B Instruction Set Reference N-Z (794102), страница 81
Текст из файла (страница 81)
2B C-9INTEL® C/C++ COMPILER INTRINSICS AND FUNCTIONAL EQUIVALENTSTable C-1. Simple Intrinsics (Contd.)MnemonicPMADDUBSWIntrinsic__m64 _mm_maddubs_pi16 (__m64 a, __m64 b)__m128i _mm_maddubs_epi16 (__m128i a, __m128i b)PMADDWD__m128i _mm_madd_epi16(__m128i m1 __m128i m2)PMADDWD__m64 _mm_madd_pi16(__m64 m1, __m64 m2)PMAXSW__m128i _mm_max_epi16(__m128i a, __m128i b)PMAXSW__m64 _mm_max_pi16(__m64 a, __m64 b)PMAXUB__m128i _mm_max_epu8(__m128i a, __m128i b)PMAXUB__m64 _mm_max_pu8(__m64 a, __m64 b)PMINSW__m128i _mm_min_epi16(__m128i a, __m128i b)PMINSW__m64 _mm_min_pi16(__m64 a, __m64 b)PMINUB__m128i _mm_min_epu8(__m128i a, __m128i b)PMINUB__m64 _mm_min_pu8(__m64 a, __m64 b)PMOVMSKBint _mm_movemask_epi8(__m128i a)PMOVMSKBint _mm_movemask_pi8(__m64 a)PMULHRSW__m64 _mm_mulhrs_pi16 (__m64 a, __m64 b)__m128i _mm_mulhrs_epi16 (__m128i a, __m128i b)PMULHUW__m128i _mm_mulhi_epu16(__m128i a, __m128i b)PMULHUW__m64 _mm_mulhi_pu16(__m64 a, __m64 b)PMULHW__m128i _mm_mulhi_epi16(__m128i m1, __m128i m2)PMULHW__m64 _mm_mulhi_pi16(__m64 m1, __m64 m2)PMULLW__m128i _mm_mullo_epi16(__m128i m1, __m128i m2)PMULLW__m64 _mm_mullo_pi16(__m64 m1, __m64 m2)PMULUDQ__m64 _mm_mul_su32(__m64 m1, __m64 m2)__m128i _mm_mul_epu32(__m128i m1, __m128i m2)POR__m64 _mm_or_si64(__m64 m1, __m64 m2)POR__m128i _mm_or_si128(__m128i m1, __m128i m2)PREFETCHhvoid _mm_prefetch(char *a, int sel)PSADBW__m128i _mm_sad_epu8(__m128i a, __m128i b)PSADBW__m64 _mm_sad_pu8(__m64 a, __m64 b)PSHUFB__m64 _mm_shuffle_pi8 (__m64 a, __m64 b)PSHUFD__m128i _mm_shuffle_epi32(__m128i a, int n)__m128i _mm_shuffle_epi8 (__m128i a, __m128i b)PSHUFHW__m128i _mm_shufflehi_epi16(__m128i a, int n)PSHUFLW__m128i _mm_shufflelo_epi16(__m128i a, int n)PSHUFW__m64 _mm_shuffle_pi16(__m64 a, int n)C-10 Vol.
2BINTEL® C/C++ COMPILER INTRINSICS AND FUNCTIONAL EQUIVALENTSTable C-1. Simple Intrinsics (Contd.)MnemonicPSIGNBIntrinsic__m64 _mm_sign_pi8 (__m64 a, __m64 b)__m128i _mm_sign_epi8 (__m128i a, __m128i b)PSIGND__m64 _mm_sign_pi32 (__m64 a, __m64 b)__m128i _mm_sign_epi32 (__m128i a, __m128i b)PSIGNW__m64 _mm_sign_pi16 (__m64 a, __m64 b)__m128i _mm_sign_epi16 (__m128i a, __m128i b)PSLLW__m128i _mm_sll_epi16(__m128i m, __m128i count)PSLLW__m128i _mm_slli_epi16(__m128i m, int count)PSLLW__m64 _mm_sll_pi16(__m64 m, __m64 count)__m64 _mm_slli_pi16(__m64 m, int count)PSLLD__m128i _mm_slli_epi32(__m128i m, int count)__m128i _mm_sll_epi32(__m128i m, __m128i count)PSLLD__m64 _mm_slli_pi32(__m64 m, int count)__m64 _mm_sll_pi32(__m64 m, __m64 count)PSLLQ__m64 _mm_sll_si64(__m64 m, __m64 count)__m64 _mm_slli_si64(__m64 m, int count)PSLLQ__m128i _mm_sll_epi64(__m128i m, __m128i count)__m128i _mm_slli_epi64(__m128i m, int count)PSLLDQ__m128i _mm_slli_si128(__m128i m, int imm)PSRAW__m128i _mm_sra_epi16(__m128i m, __m128i count)PSRAW__m64 _mm_sra_pi16(__m64 m, __m64 count)PSRAD__m128i _mm_sra_epi32 (__m128i m, __m128i count)PSRAD__m64 _mm_sra_pi32 (__m64 m, __m64 count)PSRLW_m128i _mm_srl_epi16 (__m128i m, __m128i count)__m128i _mm_srai_epi16(__m128i m, int count)__m64 _mm_srai_pi16(__m64 m, int count)__m128i _mm_srai_epi32 (__m128i m, int count)__m64 _mm_srai_pi32 (__m64 m, int count)__m128i _mm_srli_epi16 (__m128i m, int count)__m64 _mm_srl_pi16 (__m64 m, __m64 count)__m64 _mm_srli_pi16(__m64 m, int count)PSRLD__m128i _mm_srl_epi32 (__m128i m, __m128i count)PSRLD__m64 _mm_srl_pi32 (__m64 m, __m64 count)__m128i _mm_srli_epi32 (__m128i m, int count)__m64 _mm_srli_pi32 (__m64 m, int count)Vol.
2B C-11INTEL® C/C++ COMPILER INTRINSICS AND FUNCTIONAL EQUIVALENTSTable C-1. Simple Intrinsics (Contd.)MnemonicPSRLQIntrinsic__m128i _mm_srl_epi64 (__m128i m, __m128i count)__m128i _mm_srli_epi64 (__m128i m, int count)PSRLQ__m64 _mm_srl_si64 (__m64 m, __m64 count)__m64 _mm_srli_si64 (__m64 m, int count)PSRLDQ__m128i _mm_srli_si128(__m128i m, int imm)PSUBB__m128i _mm_sub_epi8(__m128i m1, __m128i m2)PSUBB__m64 _mm_sub_pi8(__m64 m1, __m64 m2)PSUBW__m128i _mm_sub_epi16(__m128i m1, __m128i m2)PSUBW__m64 _mm_sub_pi16(__m64 m1, __m64 m2)PSUBD__m128i _mm_sub_epi32(__m128i m1, __m128i m2)PSUBD__m64 _mm_sub_pi32(__m64 m1, __m64 m2)PSUBQ__m128i _mm_sub_epi64(__m128i m1, __m128i m2)PSUBQ__m64 _mm_sub_si64(__m64 m1, __m64 m2)PSUBSB__m128i _mm_subs_epi8(__m128i m1, __m128i m2)PSUBSB__m64 _mm_subs_pi8(__m64 m1, __m64 m2)PSUBSW__m128i _mm_subs_epi16(__m128i m1, __m128i m2)PSUBSW__m64 _mm_subs_pi16(__m64 m1, __m64 m2)PSUBUSB__m128i _mm_subs_epu8(__m128i m1, __m128i m2)PSUBUSB__m64 _mm_subs_pu8(__m64 m1, __m64 m2)PSUBUSW__m128i _mm_subs_epu16(__m128i m1, __m128i m2)PSUBUSW__m64 _mm_subs_pu16(__m64 m1, __m64 m2)PUNPCKHBW__m64 _mm_unpackhi_pi8(__m64 m1, __m64 m2)PUNPCKHBW__m128i _mm_unpackhi_epi8(__m128i m1, __m128i m2)PUNPCKHWD__m64 _mm_unpackhi_pi16(__m64 m1,__m64 m2)PUNPCKHWD__m128i _mm_unpackhi_epi16(__m128i m1, __m128i m2)PUNPCKHDQ___m64 _mm_unpackhi_pi32(__m64 m1, __m64 m2)PUNPCKHDQ__m128i _mm_unpackhi_epi32(__m128i m1, __m128i m2)PUNPCKHQDQ__m128i _mm_unpackhi_epi64(__m128i m1, __m128i m2)PUNPCKLBW__m64 _mm_unpacklo_pi8 (__m64 m1, __m64 m2)PUNPCKLBW__m128i _mm_unpacklo_epi8 (__m128i m1, __m128i m2)PUNPCKLWD__m64 _mm_unpacklo_pi16(__m64 m1, __m64 m2)PUNPCKLWD__m128i _mm_unpacklo_epi16(__m128i m1, __m128i m2)PUNPCKLDQ__m64 _mm_unpacklo_pi32(__m64 m1, __m64 m2)PUNPCKLDQ__m128i _mm_unpacklo_epi32(__m128i m1, __m128i m2)PUNPCKLQDQ__m128i _mm_unpacklo_epi64(__m128i m1, __m128i m2)C-12 Vol.
2BINTEL® C/C++ COMPILER INTRINSICS AND FUNCTIONAL EQUIVALENTSTable C-1. Simple Intrinsics (Contd.)MnemonicIntrinsicPXOR__m64 _mm_xor_si64(__m64 m1, __m64 m2)PXOR__m128i _mm_xor_si128(__m128i m1, __m128i m2)RCPPS__m128 _mm_rcp_ps(__m128 a)RCPSS__m128 _mm_rcp_ss(__m128 a)RSQRTPS__m128 _mm_rsqrt_ps(__m128 a)RSQRTSS__m128 _mm_rsqrt_ss(__m128 a)SFENCEvoid_mm_sfence(void)SHUFPD__m128d _mm_shuffle_pd(__m128d a, __m128d b, unsigned int imm8)SHUFPS__m128 _mm_shuffle_ps(__m128 a, __m128 b, unsigned int imm8)SQRTPD__m128d _mm_sqrt_pd(__m128d a)SQRTPS__m128 _mm_sqrt_ps(__m128 a)SQRTSD__m128d _mm_sqrt_sd(__m128d a)SQRTSS__m128 _mm_sqrt_ss(__m128 a)STMXCSR_mm_getcsr(void)SUBPD__m128d _mm_sub_pd(__m128d a, __m128d b)SUBPS__m128 _mm_sub_ps(__m128 a, __m128 b)SUBSD__m128d _mm_sub_sd(__m128d a, __m128d b)SUBSS__m128 _mm_sub_ss(__m128 a, __m128 b)UCOMISDint _mm_ucomieq_sd(__m128d a, __m128d b)int _mm_ucomilt_sd(__m128d a, __m128d b)int _mm_ucomile_sd(__m128d a, __m128d b)int _mm_ucomigt_sd(__m128d a, __m128d b)int _mm_ucomige_sd(__m128d a, __m128d b)int _mm_ucomineq_sd(__m128d a, __m128d b)UCOMISSint _mm_ucomieq_ss(__m128 a, __m128 b)int _mm_ucomilt_ss(__m128 a, __m128 b)int _mm_ucomile_ss(__m128 a, __m128 b)int _mm_ucomigt_ss(__m128 a, __m128 b)int _mm_ucomige_ss(__m128 a, __m128 b)int _mm_ucomineq_ss(__m128 a, __m128 b)UNPCKHPD__m128d _mm_unpackhi_pd(__m128d a, __m128d b)UNPCKHPS__m128 _mm_unpackhi_ps(__m128 a, __m128 b)UNPCKLPD__m128d _mm_unpacklo_pd(__m128d a, __m128d b)UNPCKLPS__m128 _mm_unpacklo_ps(__m128 a, __m128 b)Vol.
2B C-13INTEL® C/C++ COMPILER INTRINSICS AND FUNCTIONAL EQUIVALENTSTable C-1. Simple Intrinsics (Contd.)MnemonicIntrinsicXORPD__m128d _mm_xor_pd(__m128d a, __m128d b)XORPS__m128 _mm_xor_ps(__m128 a, __m128 b)C.2COMPOSITE INTRINSICSTable C-2. Composite IntrinsicsMnemonicIntrinsic(composite)__m128i _mm_set_epi64(__m64 q1, __m64 q0)(composite)__m128i _mm_set_epi32(int i3, int i2, int i1, int i0)(composite)__m128i _mm_set_epi16(short w7,short w6, short w5, short w4, short w3, short w2,short w1,short w0)(composite)__m128i _mm_set_epi8(char w15,char w14, char w13, char w12, char w11, char w10,char w9, char w8, char w7,char w6, char w5, char w4, char w3, char w2,char w1, char w0)(composite)__m128i _mm_set1_epi64(__m64 q)(composite)__m128i _mm_set1_epi32(int a)(composite)__m128i _mm_set1_epi16(short a)(composite)__m128i _mm_set1_epi8(char a)(composite)__m128i _mm_setr_epi64(__m64 q1, __m64 q0)(composite)__m128i _mm_setr_epi32(int i3, int i2, int i1, int i0)(composite)__m128i _mm_setr_epi16(short w7,short w6, short w5, short w4, short w3, short w2, short w,short w0)(composite)__m128i _mm_setr_epi8(char w15,char w14, char w13, char w12, char w11, char w10,char w9, char w8,char w7, char w6,char w5, char w4, char w3, char w2,char w1,char w0)(composite)__m128i _mm_setzero_si128()(composite)__m128 _mm_set_ps1(float w)__m128 _mm_set1_ps(float w)(composite)__m128cmm_set1_pd(double w)(composite)__m128d _mm_set_sd(double w)(composite)__m128d _mm_set_pd(double z, double y)(composite)__m128 _mm_set_ps(float z, float y, float x, float w)(composite)__m128d _mm_setr_pd(double z, double y)(composite)__m128 _mm_setr_ps(float z, float y, float x, float w)(composite)__m128d _mm_setzero_pd(void)(composite)__m128 _mm_setzero_ps(void)MOVSD +shuffle__m128d _mm_load_pd(double * p)__m128d _mm_load1_pd(double *p)C-14 Vol.
2BINTEL® C/C++ COMPILER INTRINSICS AND FUNCTIONAL EQUIVALENTSTable C-2. Composite Intrinsics (Contd.)MnemonicIntrinsicMOVSS +shuffle__m128 _mm_load_ps1(float * p)__m128 _mm_load1_ps(float *p)MOVAPD +shuffle__m128d _mm_loadr_pd(double * p)MOVAPS +shuffle__m128 _mm_loadr_ps(float * p)MOVSD +shufflevoid _mm_store1_pd(double *p, __m128d a)MOVSS +shufflevoid _mm_store_ps1(float * p, __m128 a)void _mm_store1_ps(float *p, __m128 a)MOVAPD +shuffle_mm_storer_pd(double * p, __m128d a)MOVAPS +shuffle_mm_storer_ps(float * p, __m128 a)Vol. 2B C-15INTEL® C/C++ COMPILER INTRINSICS AND FUNCTIONAL EQUIVALENTSC-16 Vol.
2BINDEX FOR VOLUMES 2A & 2BNumerics64-bit modecontrol and debug registers, 2-15default operand size, 2-15direct memory-offset MOVs, 2-13general purpose encodings, B-24immediates, 2-14introduction, 2-9machine instructions, B-1reg (reg) field, B-4REX prefixes, 2-9, B-2RIP-relative addressing, 2-14SIMD encodings, B-54special instruction encodings, B-91summary table notation, 3-6AAAA instruction, 3-19AAD instruction, 3-21AAM instruction, 3-23AAS instruction, 3-25Access rights, segment descriptor, 3-520ADC instruction, 3-27, 3-549ADD instruction, 3-19, 3-30, 3-257, 3-549ADDPD instruction, 3-33ADDPS instruction, 3-36Addressing methodsRIP-relative, 2-14Addressing, segments, 1-5ADDSD instruction, 3-39ADDSS instruction, 3-42ADDSUBPD instruction, 3-45ADDSUBPS instruction, 3-49AND instruction, 3-53, 3-549ANDNPD instruction, 3-60ANDNPS instruction, 3-62ANDPD instruction, 3-56ANDPS instruction, 3-58Arctangent, x87 FPU operation, 3-356ARPL instruction, 3-64BB (default stack size) flag, segment descriptor, 4-218Base (operand addressing), 2-4BCD integerspacked, 3-257, 3-259, 3-294, 3-296unpacked, 3-19, 3-21, 3-23, 3-25Binary numbers, 1-5Bit order, 1-3BOUND instruction, 3-66BOUND range exceeded exception (#BR), 3-66Branch hints, 2-2Brand information, 3-181processor brand index, 3-184processor brand string, 3-181BSF instruction, 3-69BSR instruction, 3-71BSWAP instruction, 3-73BT instruction, 3-75BTC instruction, 3-78, 3-549BTR instruction, 3-81, 3-549BTS instruction, 3-84, 3-549Byte order, 1-3CCache and TLB information, 3-177Cache Inclusiveness, 3-164Caches, invalidating (flushing), 3-486, 4-406CALL instruction, 3-87CBW instruction, 3-105CDQ instruction, 3-255CDQE instruction, 3-105CF (carry) flag, EFLAGS register, 3-30, 3-75, 3-78,3-81, 3-84, 3-106, 3-115, 3-261, 3-458,3-464, 3-695, 4-234, 4-288, 4-305,4-308, 4-337, 4-351CLC instruction, 3-106CLD instruction, 3-107CLFLUSH instruction, 3-108CPUID flag, 3-176CLI instruction, 3-110CLTS instruction, 3-113CMC instruction, 3-115CMOVcc flag, 3-176CMOVcc instructions, 3-116CPUID flag, 3-176CMP instruction, 3-123CMPPD instruction, 3-126CMPPS instruction, 3-131CMPS instruction, 3-136, 4-255CMPSB instruction, 3-136CMPSD instruction, 3-136, 3-142CMPSQ instruction, 3-136CMPSS instruction, 3-146CMPSW instruction, 3-136CMPXCHG instruction, 3-150, 3-549CMPXCHG16B instruction, 3-153CPUID bit, 3-173CMPXCHG8B instruction, 3-153CPUID flag, 3-175COMISD instruction, 3-156COMISS instruction, 3-159Compatibility modeintroduction, 2-9see 64-bit modeVol.