[詳解]
33 #ifndef _AK_SIMD_AVX2_H_
34 #define _AK_SIMD_AVX2_H_
39 #if !defined(__AVX2__)
40 #error "Inclusion of AkSimdAvx2.h requires AVX2 instruction sets to be defined on platform"
54 __m256 real1Ext = _mm256_moveldup_ps(cIn1);
55 __m256 in2Shuf = _mm256_shuffle_ps(cIn2, cIn2, 0xB1);
56 __m256 imag1Ext = _mm256_movehdup_ps(cIn1);
57 __m256 temp = _mm256_mul_ps(imag1Ext, in2Shuf);
58 __m256 out = _mm256_fmaddsub_ps(real1Ext, cIn2, temp);
63 #define AKSIMD_MADDSUB_V8F32( __a__, __b__, __c__ ) _mm256_fmaddsub_ps( (__a__), (__b__), (__c__) )
64 #define AKSIMD_MSUBADD_V8F32( __a__, __b__, __c__ ) _mm256_fmsubadd_ps( (__a__), (__b__), (__c__) )
67 #define AKSIMD_MADD_V8F32( __a__, __b__, __c__ ) _mm256_fmadd_ps( (__a__), (__b__) , (__c__) )
68 #define AKSIMD_MSUB_V8F32( __a__, __b__, __c__ ) _mm256_fmsub_ps( (__a__), (__b__) , (__c__) )
79 #define AKSIMD_SHUFFLEB_V8I32(a, b) _mm256_shuffle_epi8(a, b)
84 #define AKSIMD_BLEND_V16I16(a, b, i) _mm256_blend_epi16(a, b, i)
86 #define AKSIMD_INSERT_V2I128( a, m128, idx) _mm256_inserti128_si256(a, m128, idx)
90 #define AKSIMD_PERMUTE_2X128_V8I32( a, b, i ) _mm256_permute2x128_si256(a, b, i)
93 #define AKSIMD_DEINTERLEAVELANES_LO_V8I32( a, b ) AKSIMD_PERMUTE_2X128_V8I32(a, b, AKSIMD_PERMUTE128(2, 0))
96 #define AKSIMD_DEINTERLEAVELANES_HI_V8I32( a, b ) AKSIMD_PERMUTE_2X128_V8I32(a, b, AKSIMD_PERMUTE128(3, 1))
100 #define AKSIMD_PERMUTE_4X64_V8F32( a, i ) _mm256_castpd_ps(_mm256_permute4x64_pd(_mm256_castps_pd(a), i))
110 #define AKSIMD_CONVERT_V8I16_TO_V8I32( __vec__ ) _mm256_cvtepi16_epi32( (__vec__) )
120 #define AKSIMD_ADD_V8I32( a, b ) _mm256_add_epi32( a, b )
122 #define AKSIMD_CMPLT_V8I32( a, b ) _mm256_cmpgt_epi32( b, a )
123 #define AKSIMD_CMPGT_V8I32( a, b ) _mm256_cmpgt_epi32( a, b )
124 #define AKSIMD_OR_V8I32( a, b ) _mm256_or_si256(a,b)
125 #define AKSIMD_XOR_V8I32( a, b ) _mm256_xor_si256(a,b)
126 #define AKSIMD_SUB_V8I32( a, b ) _mm256_sub_epi32(a,b)
130 #define AKSIMD_AND_V8I32( __a__, __b__ ) _mm256_and_si256( (__a__), (__b__) )
133 #define AKSIMD_MULLO_V8I32( a , b) _mm256_mullo_epi32(a, b)
136 #define AKSIMD_MULLO16_V8I32( a , b) _mm256_mullo_epi16(a, b)
139 #define AKSIMD_SUB_V16I16( a, b ) _mm256_sub_epi16( a, b )
143 #define AKSIMD_CMPGT_V16I16( __a__, __b__ ) _mm256_cmpgt_epi16( (__a__), (__b__) )
154 #define AKSIMD_UNPACKLO_VECTOR16I16( a, b ) _mm256_unpacklo_epi16( a, b )
159 #define AKSIMD_UNPACKHI_VECTOR16I16( a, b ) _mm256_unpackhi_epi16( a, b )
163 #define AKSIMD_PACKS_V8I32( a, b ) _mm256_packs_epi32( a, b )
174 #define AKSIMD_SHIFTLEFT_V8I32( __vec__, __shiftBy__ ) \
175 _mm256_slli_epi32( (__vec__), (__shiftBy__) )
179 #define AKSIMD_SHIFTRIGHT_V8I32( __vec__, __shiftBy__ ) \
180 _mm256_srli_epi32( (__vec__), (__shiftBy__) )
184 #define AKSIMD_SHIFTRIGHTARITH_V8I32( __vec__, __shiftBy__ ) \
185 _mm256_srai_epi32( (__vec__), (__shiftBy__) )
201 template <
typename T,
typename Function>
204 __m256i vals = _mm256_setzero_si256();
205 __m128i valsTemp[2] = { _mm_setzero_si128(),_mm_setzero_si128() };
206 #define _GATHER_SIM_FETCH(_x) \
209 ::memcpy(&val, (base_ptr + expr(_x)), sizeof(val)); \
210 valsTemp[_x/4] = _mm_insert_epi32(valsTemp[_x/4], val, _x%4);\
221 #undef _GATHER_SIM_FETCH
222 vals = _mm256_setr_m128i(valsTemp[0], valsTemp[1]);
226 template <
typename T,
typename Function>
229 __m256i vals = _mm256_setzero_si256();
230 __m128i valsTemp[2] = { _mm_setzero_si128(),_mm_setzero_si128() };
231 #define _GATHER_SIM_FETCH(_x) \
234 ::memcpy(&val, (base_ptr + expr(_x)), sizeof(val)); \
235 valsTemp[_x/2] = _mm_insert_epi64(valsTemp[_x/2], val, _x%2);\
242 #undef _GATHER_SIM_FETCH
243 vals = _mm256_setr_m128i(valsTemp[0], valsTemp[1]);
247 template <
typename T,
typename Function>
253 template <
typename T,
typename Function>
263 #endif //_AK_SIMD_AVX2_H_
static AkForceInline AKSIMD_V8F32 AKSIMD_COMPLEXMUL_AVX2(const AKSIMD_V8F32 cIn1, const AKSIMD_V8F32 cIn2)
AKSIMD_V8F32 AKSIMD_GATHER_PS(const T *base_ptr, Function expr)
#define _GATHER_SIM_FETCH(_x)
AKSIMD_V4F64 AKSIMD_GATHER_PD(const T *base_ptr, Function expr)
AKSIMD_V8I32 AKSIMD_GATHER_EPI32(const T *__restrict base_ptr, Function expr)
AKSIMD_V8I32 AKSIMD_GATHER_EPI64(const T *base_ptr, Function expr)
あなたのプロジェクトについて教えてください。ご不明な点はありませんか。
プロジェクトを登録していただくことで、ご利用開始のサポートをいたします。
Wwiseからはじめよう