浏览该文件的文档.
33 #ifndef _AK_SIMD_AVX2_H_
34 #define _AK_SIMD_AVX2_H_
39 #if !defined(__AVX2__)
40 #error "Inclusion of AkSimdAvx2.h requires AVX2 instruction sets to be defined on platform"
54 __m256 real1Ext = _mm256_moveldup_ps(cIn1);
55 __m256 in2Shuf = _mm256_shuffle_ps(cIn2, cIn2, 0xB1);
56 __m256 imag1Ext = _mm256_movehdup_ps(cIn1);
57 __m256 temp = _mm256_mul_ps(imag1Ext, in2Shuf);
58 __m256 out = _mm256_fmaddsub_ps(real1Ext, cIn2, temp);
71 #define AKSIMD_SHUFFLEB_V8I32(a, b) _mm256_shuffle_epi8(a, b)
76 #define AKSIMD_BLEND_V16I16(a, b, i) _mm256_blend_epi16(a, b, i)
78 #define AKSIMD_INSERT_V2I128( a, m128, idx) _mm256_inserti128_si256(a, m128, idx)
82 #define AKSIMD_PERMUTE_2X128_V8I32( a, b, i ) _mm256_permute2x128_si256(a, b, i)
85 #define AKSIMD_DEINTERLEAVELANES_LO_V8I32( a, b ) AKSIMD_PERMUTE_2X128_V8I32(a, b, AKSIMD_PERMUTE128(2, 0))
88 #define AKSIMD_DEINTERLEAVELANES_HI_V8I32( a, b ) AKSIMD_PERMUTE_2X128_V8I32(a, b, AKSIMD_PERMUTE128(3, 1))
98 #define AKSIMD_CONVERT_V8I16_TO_V8I32( __vec__ ) _mm256_cvtepi16_epi32( (__vec__) )
108 #define AKSIMD_ADD_V8I32( a, b ) _mm256_add_epi32( a, b )
110 #define AKSIMD_CMPLT_V8I32( a, b ) _mm256_cmpgt_epi32( b, a )
111 #define AKSIMD_CMPGT_V8I32( a, b ) _mm256_cmpgt_epi32( a, b )
112 #define AKSIMD_OR_V8I32( a, b ) _mm256_or_si256(a,b)
113 #define AKSIMD_XOR_V8I32( a, b ) _mm256_xor_si256(a,b)
114 #define AKSIMD_SUB_V8I32( a, b ) _mm256_sub_epi32(a,b)
118 #define AKSIMD_AND_V8I32( __a__, __b__ ) _mm256_and_si256( (__a__), (__b__) )
121 #define AKSIMD_MULLO_V8I32( a , b) _mm256_mullo_epi32(a, b)
124 #define AKSIMD_MULLO16_V8I32( a , b) _mm256_mullo_epi16(a, b)
127 #define AKSIMD_SUB_V16I16( a, b ) _mm256_sub_epi16( a, b )
131 #define AKSIMD_CMPGT_V16I16( __a__, __b__ ) _mm256_cmpgt_epi16( (__a__), (__b__) )
142 #define AKSIMD_UNPACKLO_VECTOR16I16( a, b ) _mm256_unpacklo_epi16( a, b )
147 #define AKSIMD_UNPACKHI_VECTOR16I16( a, b ) _mm256_unpackhi_epi16( a, b )
151 #define AKSIMD_PACKS_V8I32( a, b ) _mm256_packs_epi32( a, b )
162 #define AKSIMD_SHIFTLEFT_V8I32( __vec__, __shiftBy__ ) \
163 _mm256_slli_epi32( (__vec__), (__shiftBy__) )
167 #define AKSIMD_SHIFTRIGHTARITH_V8I32( __vec__, __shiftBy__ ) \
168 _mm256_srai_epi32( (__vec__), (__shiftBy__) )
184 template <
typename T,
typename Function>
187 __m256i vals = _mm256_setzero_si256();
188 __m128i valsTemp[2] = { _mm_setzero_si128(),_mm_setzero_si128() };
189 #define _GATHER_SIM_FETCH(_x) \
191 AkInt32 val = *(AkInt32*)(base_ptr + expr(_x)); \
192 valsTemp[_x/4] = _mm_insert_epi32(valsTemp[_x/4], val, _x%4);\
203 #undef _GATHER_SIM_FETCH
204 vals = _mm256_setr_m128i(valsTemp[0], valsTemp[1]);
208 template <
typename T,
typename Function>
211 __m256i vals = _mm256_setzero_si256();
212 __m128i valsTemp[2] = { _mm_setzero_si128(),_mm_setzero_si128() };
213 #define _GATHER_SIM_FETCH(_x) \
215 AkInt64 val = *(AkInt64*)(base_ptr + expr(_x)); \
216 valsTemp[_x/2] = _mm_insert_epi64(valsTemp[_x/2], val, _x%2);\
223 #undef _GATHER_SIM_FETCH
224 vals = _mm256_setr_m128i(valsTemp[0], valsTemp[1]);
228 template <
typename T,
typename Function>
234 template <
typename T,
typename Function>
244 #endif //_AK_SIMD_AVX2_H_
AKSIMD_V8F32 AKSIMD_GATHER_PD(const T *base_ptr, Function expr)
static AkForceInline AKSIMD_V8F32 AKSIMD_COMPLEXMUL_AVX2(const AKSIMD_V8F32 cIn1, const AKSIMD_V8F32 cIn2)
AKSIMD_V8F32 AKSIMD_GATHER_PS(const T *base_ptr, Function expr)
#define _GATHER_SIM_FETCH(_x)
AKSIMD_V8I32 AKSIMD_GATHER_EPI32(const T *__restrict base_ptr, Function expr)
AKSIMD_V8I32 AKSIMD_GATHER_EPI64(const T *base_ptr, Function expr)
介绍一下自己的项目。我们会竭力为您提供帮助。
来注册自己的项目,我们帮您快速入门,不带任何附加条件!
开始 Wwise 之旅