이 파일의 문서화 페이지로 가기
32 #ifndef _AK_SIMD_AVX2_H_
33 #define _AK_SIMD_AVX2_H_
38 #if !defined(__AVX2__)
39 #error "Inclusion of AkSimdAvx2.h requires AVX2 instruction sets to be defined on platform"
53 __m256 real1Ext = _mm256_moveldup_ps(cIn1);
54 __m256 in2Shuf = _mm256_shuffle_ps(cIn2, cIn2, 0xB1);
55 __m256 imag1Ext = _mm256_movehdup_ps(cIn1);
56 __m256 temp = _mm256_mul_ps(imag1Ext, in2Shuf);
57 __m256 out = _mm256_fmaddsub_ps(real1Ext, cIn2, temp);
62 #define AKSIMD_MADDSUB_V8F32( __a__, __b__, __c__ ) _mm256_fmaddsub_ps( (__a__), (__b__), (__c__) )
63 #define AKSIMD_MSUBADD_V8F32( __a__, __b__, __c__ ) _mm256_fmsubadd_ps( (__a__), (__b__), (__c__) )
66 #define AKSIMD_MADD_V8F32( __a__, __b__, __c__ ) _mm256_fmadd_ps( (__a__), (__b__) , (__c__) )
67 #define AKSIMD_MSUB_V8F32( __a__, __b__, __c__ ) _mm256_fmsub_ps( (__a__), (__b__) , (__c__) )
78 #define AKSIMD_SHUFFLEB_V8I32(a, b) _mm256_shuffle_epi8(a, b)
83 #define AKSIMD_BLEND_V16I16(a, b, i) _mm256_blend_epi16(a, b, i)
85 #define AKSIMD_INSERT_V2I128( a, m128, idx) _mm256_inserti128_si256(a, m128, idx)
89 #define AKSIMD_PERMUTE_2X128_V8I32( a, b, i ) _mm256_permute2x128_si256(a, b, i)
92 #define AKSIMD_DEINTERLEAVELANES_LO_V8I32( a, b ) AKSIMD_PERMUTE_2X128_V8I32(a, b, AKSIMD_PERMUTE128(2, 0))
95 #define AKSIMD_DEINTERLEAVELANES_HI_V8I32( a, b ) AKSIMD_PERMUTE_2X128_V8I32(a, b, AKSIMD_PERMUTE128(3, 1))
99 #define AKSIMD_PERMUTE_4X64_V8F32( a, i ) _mm256_castpd_ps(_mm256_permute4x64_pd(_mm256_castps_pd(a), i))
109 #define AKSIMD_CONVERT_V8I16_TO_V8I32( __vec__ ) _mm256_cvtepi16_epi32( (__vec__) )
119 #define AKSIMD_ADD_V8I32( a, b ) _mm256_add_epi32( a, b )
121 #define AKSIMD_CMPLT_V8I32( a, b ) _mm256_cmpgt_epi32( b, a )
122 #define AKSIMD_CMPGT_V8I32( a, b ) _mm256_cmpgt_epi32( a, b )
123 #define AKSIMD_OR_V8I32( a, b ) _mm256_or_si256(a,b)
124 #define AKSIMD_XOR_V8I32( a, b ) _mm256_xor_si256(a,b)
125 #define AKSIMD_SUB_V8I32( a, b ) _mm256_sub_epi32(a,b)
129 #define AKSIMD_AND_V8I32( __a__, __b__ ) _mm256_and_si256( (__a__), (__b__) )
132 #define AKSIMD_MULLO_V8I32( a , b) _mm256_mullo_epi32(a, b)
135 #define AKSIMD_MULLO16_V8I32( a , b) _mm256_mullo_epi16(a, b)
138 #define AKSIMD_SUB_V16I16( a, b ) _mm256_sub_epi16( a, b )
142 #define AKSIMD_CMPGT_V16I16( __a__, __b__ ) _mm256_cmpgt_epi16( (__a__), (__b__) )
153 #define AKSIMD_UNPACKLO_VECTOR16I16( a, b ) _mm256_unpacklo_epi16( a, b )
158 #define AKSIMD_UNPACKHI_VECTOR16I16( a, b ) _mm256_unpackhi_epi16( a, b )
162 #define AKSIMD_PACKS_V8I32( a, b ) _mm256_packs_epi32( a, b )
173 #define AKSIMD_SHIFTLEFT_V8I32( __vec__, __shiftBy__ ) \
174 _mm256_slli_epi32( (__vec__), (__shiftBy__) )
178 #define AKSIMD_SHIFTLEFT16_V8I32( __vec__ ) \
179 _mm256_shuffle_epi8( (__vec__), _mm256_set_epi8( \
191 #define AKSIMD_SHIFTRIGHT_V8I32( __vec__, __shiftBy__ ) \
192 _mm256_srli_epi32( (__vec__), (__shiftBy__) )
196 #define AKSIMD_SHIFTRIGHTARITH_V8I32( __vec__, __shiftBy__ ) \
197 _mm256_srai_epi32( (__vec__), (__shiftBy__) )
213 template <
typename T,
typename Function>
216 __m256i vals = _mm256_setzero_si256();
217 __m128i valsTemp[2] = { _mm_setzero_si128(),_mm_setzero_si128() };
218 #define _GATHER_SIM_FETCH(_x) \
221 memcpy(&val, (base_ptr + expr(_x)), sizeof(val)); \
222 valsTemp[_x/4] = _mm_insert_epi32(valsTemp[_x/4], val, _x%4);\
233 #undef _GATHER_SIM_FETCH
234 vals = _mm256_setr_m128i(valsTemp[0], valsTemp[1]);
238 template <
typename T,
typename Function>
241 __m256i vals = _mm256_setzero_si256();
242 __m128i valsTemp[2] = { _mm_setzero_si128(),_mm_setzero_si128() };
243 #define _GATHER_SIM_FETCH(_x) \
246 memcpy(&val, (base_ptr + expr(_x)), sizeof(val)); \
247 valsTemp[_x/2] = _mm_insert_epi64(valsTemp[_x/2], val, _x%2);\
254 #undef _GATHER_SIM_FETCH
255 vals = _mm256_setr_m128i(valsTemp[0], valsTemp[1]);
259 template <
typename T,
typename Function>
265 template <
typename T,
typename Function>
275 #endif //_AK_SIMD_AVX2_H_
static AkForceInline AKSIMD_V8F32 AKSIMD_COMPLEXMUL_AVX2(const AKSIMD_V8F32 cIn1, const AKSIMD_V8F32 cIn2)
AKSIMD_V8F32 AKSIMD_GATHER_PS(const T *base_ptr, Function expr)
#define _GATHER_SIM_FETCH(_x)
AKSIMD_V4F64 AKSIMD_GATHER_PD(const T *base_ptr, Function expr)
AKSIMD_V8I32 AKSIMD_GATHER_EPI32(const T *__restrict base_ptr, Function expr)
AKSIMD_V8I32 AKSIMD_GATHER_EPI64(const T *base_ptr, Function expr)
지원이 필요하신가요?
질문이 있으신가요? 문제를 겪고 계신가요? 더 많은 정보가 필요하신가요? 저희에게 문의해주시면 도와드리겠습니다!
지원 페이지를 방문해 주세요
작업하는 프로젝트에 대해 알려주세요. 언제든지 도와드릴 준비가 되어 있습니다.
프로젝트를 등록하세요. 아무런 조건이나 의무 사항 없이 빠른 시작을 도와드리겠습니다.
Wwise를 시작해 보세요