Go to the documentation of this file.
32 #ifndef _AK_SIMD_AVX2_H_
33 #define _AK_SIMD_AVX2_H_
38 #if !defined(__AVX2__)
39 #error "Inclusion of AkSimdAvx2.h requires AVX2 instruction sets to be defined on platform"
53 __m256 real1Ext = _mm256_moveldup_ps(cIn1);
54 __m256 in2Shuf = _mm256_shuffle_ps(cIn2, cIn2, 0xB1);
55 __m256 imag1Ext = _mm256_movehdup_ps(cIn1);
56 __m256 temp = _mm256_mul_ps(imag1Ext, in2Shuf);
57 __m256 out = _mm256_fmaddsub_ps(real1Ext, cIn2, temp);
62 #define AKSIMD_MADDSUB_V8F32( __a__, __b__, __c__ ) _mm256_fmaddsub_ps( (__a__), (__b__), (__c__) )
63 #define AKSIMD_MSUBADD_V8F32( __a__, __b__, __c__ ) _mm256_fmsubadd_ps( (__a__), (__b__), (__c__) )
66 #define AKSIMD_MADD_V8F32( __a__, __b__, __c__ ) _mm256_fmadd_ps( (__a__), (__b__) , (__c__) )
67 #define AKSIMD_MSUB_V8F32( __a__, __b__, __c__ ) _mm256_fmsub_ps( (__a__), (__b__) , (__c__) )
78 #define AKSIMD_SHUFFLEB_V8I32(a, b) _mm256_shuffle_epi8(a, b)
83 #define AKSIMD_BLEND_V16I16(a, b, i) _mm256_blend_epi16(a, b, i)
85 #define AKSIMD_INSERT_V2I128( a, m128, idx) _mm256_inserti128_si256(a, m128, idx)
89 #define AKSIMD_PERMUTE_2X128_V8I32( a, b, i ) _mm256_permute2x128_si256(a, b, i)
92 #define AKSIMD_DEINTERLEAVELANES_LO_V8I32( a, b ) AKSIMD_PERMUTE_2X128_V8I32(a, b, AKSIMD_PERMUTE128(2, 0))
95 #define AKSIMD_DEINTERLEAVELANES_HI_V8I32( a, b ) AKSIMD_PERMUTE_2X128_V8I32(a, b, AKSIMD_PERMUTE128(3, 1))
99 #define AKSIMD_PERMUTE_4X64_V8F32( a, i ) _mm256_castpd_ps(_mm256_permute4x64_pd(_mm256_castps_pd(a), i))
109 #define AKSIMD_CONVERT_V8I16_TO_V8I32( __vec__ ) _mm256_cvtepi16_epi32( (__vec__) )
119 #define AKSIMD_ADD_V8I32( a, b ) _mm256_add_epi32( a, b )
121 #define AKSIMD_CMPLT_V8I32( a, b ) _mm256_cmpgt_epi32( b, a )
122 #define AKSIMD_CMPGT_V8I32( a, b ) _mm256_cmpgt_epi32( a, b )
123 #define AKSIMD_OR_V8I32( a, b ) _mm256_or_si256(a,b)
124 #define AKSIMD_XOR_V8I32( a, b ) _mm256_xor_si256(a,b)
125 #define AKSIMD_SUB_V8I32( a, b ) _mm256_sub_epi32(a,b)
129 #define AKSIMD_AND_V8I32( __a__, __b__ ) _mm256_and_si256( (__a__), (__b__) )
132 #define AKSIMD_MULLO_V8I32( a , b) _mm256_mullo_epi32(a, b)
135 #define AKSIMD_MULLO16_V8I32( a , b) _mm256_mullo_epi16(a, b)
138 #define AKSIMD_SUB_V16I16( a, b ) _mm256_sub_epi16( a, b )
142 #define AKSIMD_CMPGT_V16I16( __a__, __b__ ) _mm256_cmpgt_epi16( (__a__), (__b__) )
153 #define AKSIMD_UNPACKLO_VECTOR16I16( a, b ) _mm256_unpacklo_epi16( a, b )
158 #define AKSIMD_UNPACKHI_VECTOR16I16( a, b ) _mm256_unpackhi_epi16( a, b )
162 #define AKSIMD_PACKS_V8I32( a, b ) _mm256_packs_epi32( a, b )
173 #define AKSIMD_SHIFTLEFT_V8I32( __vec__, __shiftBy__ ) \
174 _mm256_slli_epi32( (__vec__), (__shiftBy__) )
178 #define AKSIMD_SHIFTLEFT16_V8I32( __vec__ ) \
179 _mm256_shuffle_epi8( (__vec__), _mm256_set_epi8( \
191 #define AKSIMD_SHIFTRIGHT_V8I32( __vec__, __shiftBy__ ) \
192 _mm256_srli_epi32( (__vec__), (__shiftBy__) )
196 #define AKSIMD_SHIFTRIGHTARITH_V8I32( __vec__, __shiftBy__ ) \
197 _mm256_srai_epi32( (__vec__), (__shiftBy__) )
213 template <
typename T,
typename Function>
216 __m256i vals = _mm256_setzero_si256();
217 __m128i valsTemp[2] = { _mm_setzero_si128(),_mm_setzero_si128() };
218 #define _GATHER_SIM_FETCH(_x) \
221 memcpy(&val, (base_ptr + expr(_x)), sizeof(val)); \
222 valsTemp[_x/4] = _mm_insert_epi32(valsTemp[_x/4], val, _x%4);\
233 #undef _GATHER_SIM_FETCH
234 vals = _mm256_setr_m128i(valsTemp[0], valsTemp[1]);
238 template <
typename T,
typename Function>
241 __m256i vals = _mm256_setzero_si256();
242 __m128i valsTemp[2] = { _mm_setzero_si128(),_mm_setzero_si128() };
243 #define _GATHER_SIM_FETCH(_x) \
246 memcpy(&val, (base_ptr + expr(_x)), sizeof(val)); \
247 valsTemp[_x/2] = _mm_insert_epi64(valsTemp[_x/2], val, _x%2);\
254 #undef _GATHER_SIM_FETCH
255 vals = _mm256_setr_m128i(valsTemp[0], valsTemp[1]);
259 template <
typename T,
typename Function>
265 template <
typename T,
typename Function>
275 #endif //_AK_SIMD_AVX2_H_
static AkForceInline AKSIMD_V8F32 AKSIMD_COMPLEXMUL_AVX2(const AKSIMD_V8F32 cIn1, const AKSIMD_V8F32 cIn2)
AKSIMD_V8F32 AKSIMD_GATHER_PS(const T *base_ptr, Function expr)
#define _GATHER_SIM_FETCH(_x)
AKSIMD_V4F64 AKSIMD_GATHER_PD(const T *base_ptr, Function expr)
AKSIMD_V8I32 AKSIMD_GATHER_EPI32(const T *__restrict base_ptr, Function expr)
AKSIMD_V8I32 AKSIMD_GATHER_EPI64(const T *base_ptr, Function expr)
Tell us about your project. We're here to help.
Register your project and we'll help you get started with no strings attached!
Get started with Wwise