Go to the source code of this file.
Defines | |
Platform specific memory size alignment for allocation purposes | |
#define | AKSIMD_ALIGNSIZE(__Size__) (((__Size__) + 15) & ~15) |
AKSIMD loading / setting | |
#define | AKSIMD_LOAD_V4F32(__addr__) vld1q_f32( (float32_t*)(__addr__) ) |
Loads four single-precision, floating-point values (see _mm_load_ps). |
|
#define | AKSIMD_LOADU_V4F32(__addr__) vld1q_f32( (float32_t*)(__addr__) ) |
#define | AKSIMD_LOAD1_V4F32(__scalar__) vld1q_dup_f32( (float32_t*)(&(__scalar__)) ) |
#define | AKSIMD_SET_V4F32(__scalar__) vdupq_n_f32( __scalar__ ) |
#define | AKSIMD_SET_V4I32(__scalar__) vdupq_n_s32( __scalar__ ) |
Sets the four integer values to __scalar__. |
|
#define | AKSIMD_SETZERO_V4F32() AKSIMD_SET_V4F32( 0 ) |
#define | AKSIMD_LOAD_SS_V4F32(__addr__) vld1q_lane_f32( (float32_t*)(__addr__), AKSIMD_SETZERO_V4F32(), 0 ); |
#define | AKSIMD_LOAD_V4I32(__addr__) vld1q_s32( (const int32_t*)(__addr__) ) |
Loads four 32-bit signed integer values (aligned). |
|
#define | AKSIMD_LOAD_V8I16(__addr__) vld1q_s16( (const int16_t*)(__addr__) ) |
Loads 8 16-bit signed integer values (aligned). |
|
#define | AKSIMD_LOAD_V4I16(__addr__) vld1_s16( (const int16_t*)(__addr__) ) |
Loads 4 16-bit signed integer values (aligned). |
|
#define | AKSIMD_LOADU_V4I32(__addr__) *__addr__ |
Loads unaligned 128-bit value (see _mm_loadu_si128). |
|
#define | AKSIMD_SETZERO_V4I32() vdupq_n_s32( 0 ) |
Sets the four 32-bit integer values to zero (see _mm_setzero_si128). |
|
#define | AKSIMD_LOAD_V2F32(__addr__) vld1_f32( (float32_t*)(__addr__) ) |
Loads two single-precision, floating-point values. |
|
#define | AKSIMD_LOAD_V2F32_LANE(__addr__, __vec__, __lane__) vld1_lane_f32( (float32_t*)(__addr__), (__vec__), (__lane__) ); |
#define | AKSIMD_SET_V2F32(__scalar__) vdup_n_f32( __scalar__ ) |
Sets the two single-precision, floating-point values to __scalar__. |
|
#define | AKSIMD_SETZERO_V2F32() AKSIMD_SET_V2F32( 0 ) |
Sets the two single-precision, floating-point values to zero. |
|
#define | AKSIMD_LOAD_V4F32X2(__addr__) vld2q_f32( (float32_t*)(__addr__) ) |
Loads data from memory and de-interleaves. |
|
#define | AKSIMD_LOAD_V2F32X2(__addr__) vld2_f32( (float32_t*)(__addr__) ) |
#define | AKSIMD_LOAD_V2F32X2_LANE(__addr__, __vec__, __lane__) vld2_lane_f32( (float32_t*)(__addr__), (__vec__), (__lane__) ); |
Loads data from memory and de-interleaves; only selected lane. |
|
#define | AKSIMD_LOAD_V4F32X4_LANE(__addr__, __vec__, __lane__) vld4q_lane_f32( (float32_t*)(__addr__), (__vec__), (__lane__) ); |
AKSIMD storing | |
#define | AKSIMD_STORE_V4F32(__addr__, __vName__) vst1q_f32( (float32_t*)(__addr__), (__vName__) ) |
Stores four single-precision, floating-point values. The address must be 16-byte aligned. |
|
#define | AKSIMD_STOREU_V4F32(__addr__, __vec__) vst1q_f32( (float32_t*)(__addr__), (__vec__) ) |
Stores four single-precision, floating-point values. The address does not need to be 16-byte aligned. |
|
#define | AKSIMD_STORE1_V4F32(__addr__, __vec__) vst1q_lane_f32( (float32_t*)(__addr__), (__vec__), 0 ) |
#define | AKSIMD_STORE_V4I32(__addr__, __vec__) vst1q_s32( (int32_t*)(__addr__), (__vec__) ) |
Stores four 32-bit integer values. The address must be 16-byte aligned. |
|
#define | AKSIMD_STOREU_V4I32(__addr__, __vec__) vst1q_s32( (int32_t*)(__addr__), (__vec__) ) |
Stores four 32-bit integer values. The address does not need to be 16-byte aligned. |
|
#define | AKSIMD_STOREU_V4UI32(__addr__, __vec__) vst1q_u32( (uint32_t*)(__addr__), (__vec__) ) |
Stores four 32-bit unsigned integer values. The address does not need to be 16-byte aligned. |
|
#define | AKSIMD_STORE_V2F32(__addr__, __vName__) vst1_f32( (AkReal32*)(__addr__), (__vName__) ) |
Stores two single-precision, floating-point values. The address must be 16-byte aligned. |
|
#define | AKSIMD_STORE_V4F32X2(__addr__, __vName__) vst2q_f32( (float32_t*)(__addr__), (__vName__) ) |
Stores data by interleaving into memory. |
|
#define | AKSIMD_STORE_V2F32X2(__addr__, __vName__) vst2_f32( (float32_t*)(__addr__), (__vName__) ) |
AKSIMD conversion | |
#define | AKSIMD_CONVERT_V4I32_TO_V4F32(__vec__) vcvtq_f32_s32( __vec__ ) |
#define | AKSIMD_CONVERT_V4F32_TO_V4I32(__vec__) vcvtq_s32_f32( __vec__ ) |
#define | AKSIMD_TRUNCATE_V4F32_TO_V4I32(__vec__) vcvtq_s32_f32( (__vec__) ) |
#define | AKSIMD_CONVERT_V2F32_TO_V2I32(__vec__) vcvt_s32_f32( __vec__ ) |
AKSIMD shifting | |
#define | AKSIMD_SHIFTLEFT_V4I32(__vec__, __shiftBy__) vshlq_n_s32( (__vec__), (__shiftBy__) ) |
#define | AKSIMD_SHIFTRIGHTARITH_V4I32(__vec__, __shiftBy__) vrshrq_n_s32( (__vec__), (__shiftBy__) ) |
Typedefs | |
AKSIMD types | |
typedef int32x4_t | AKSIMD_V4I32 |
Vector of 4 32-bit signed integers. |
|
typedef int16x8_t | AKSIMD_V8I16 |
Vector of 8 16-bit signed integers. |
|
typedef int16x4_t | AKSIMD_V4I16 |
Vector of 4 16-bit signed integers. |
|
typedef uint32x4_t | AKSIMD_V4UI32 |
Vector of 4 32-bit unsigned signed integers. |
|
typedef uint32x2_t | AKSIMD_V2UI32 |
Vector of 2 32-bit unsigned signed integers. |
|
typedef int32x2_t | AKSIMD_V2I32 |
Vector of 2 32-bit signed integers. |
|
typedef float32_t | AKSIMD_F32 |
32-bit float |
|
typedef float32x2_t | AKSIMD_V2F32 |
Vector of 2 32-bit floats. |
|
typedef float32x4_t | AKSIMD_V4F32 |
Vector of 4 32-bit floats. |
|
typedef uint32x4_t | AKSIMD_V4COND |
Vector of 4 comparison results. |
|
typedef uint32x4_t | AKSIMD_V4ICOND |
Vector of 4 comparison results. |
|
typedef uint32x4_t | AKSIMD_V4FCOND |
Vector of 4 comparison results. |
|
typedef float32x2x2_t | AKSIMD_V2F32X2 |
typedef float32x4x2_t | AKSIMD_V4F32X2 |
typedef float32x4x4_t | AKSIMD_V4F32X4 |
AKSIMD logical operations | |
| |
#define | AKSIMD_AND_V4I32(__a__, __b__) vandq_s32( (__a__), (__b__) ) |
#define | AKSIMD_CMPGT_V8I16(__a__, __b__) vreinterpretq_s32_u16( vcgtq_s16( vreinterpretq_s16_s32(__a__), vreinterpretq_s16_s32(__b__) ) ) |
#define | AKSIMD_CMPLE_V4F32(__a__, __b__) vcleq_f32( (__a__), (__b__) ) |
Compares for less than or equal (see _mm_cmple_ps). |
|
#define | AKSIMD_CMPLT_V4I32(__a__, __b__) vreinterpretq_s32_u32(vcltq_s32(__a__, __b__)) |
#define | AKSIMD_CMPGT_V4I32(__a__, __b__) vreinterpretq_s32_u32(vcgtq_s32(__a__,__b__)) |
#define | AKSIMD_XOR_V4I32(__a__, __b__) veorq_s32(__a__, __b__) |
#define | AKSIMD_SUB_V4I32(__a__, __b__) vsubq_s32(__a__, __b__) |
static AkForceInline AKSIMD_V4F32 | AKSIMD_XOR_V4F32 (const AKSIMD_V4F32 &in_vec0, const AKSIMD_V4F32 &in_vec1) |
AKSIMD shuffling | |
| |
#define | AKSIMD_COMBINE_V2F32(a, b) vcombine_f32( a, b ) |
#define | AKSIMD_SHUFFLE(fp3, fp2, fp1, fp0) (((fp3) << 6) | ((fp2) << 4) | ((fp1) << 2) | ((fp0))) |
#define | AKSIMD_SHUFFLE_V4F32(a, b, zyxw) _AKSIMD_LOCAL::SHUFFLE_V4F32< zyxw >( a, b ) |
#define | AKSIMD_SHUFFLE_BCDA(__a__) AKSIMD_SHUFFLE_V4F32( (__a__), (__a__), AKSIMD_SHUFFLE(0,3,2,1)) |
Barrel-shift all floats by one. |
|
#define | AKSIMD_SHUFFLE_BADC(__a__) vrev64q_f32( __a__ ) |
Swap the 2 lower floats together and the 2 higher floats together. |
|
#define | AKSIMD_SHUFFLE_CDAB(__a__) vcombine_f32( vget_high_f32(__a__), vget_low_f32(__a__) ) |
Swap the 2 lower floats with the 2 higher floats. |
|
#define | AKSIMD_DUP_ODD(__vv) AKSIMD_SHUFFLE_V4F32(__vv, __vv, AKSIMD_SHUFFLE(3,3,1,1)) |
Duplicates the odd items into the even items (d c b a -> d d b b ). |
|
#define | AKSIMD_DUP_EVEN(__vv) AKSIMD_SHUFFLE_V4F32(__vv, __vv, AKSIMD_SHUFFLE(2,2,0,0)) |
Duplicates the even items into the odd items (d c b a -> c c a a ). |
|
AKSIMD_V4F32 | AKSIMD_MOVEHL_V4F32 (const AKSIMD_V4F32 abcd, const AKSIMD_V4F32 xyzw) |
AKSIMD_V4F32 | AKSIMD_MOVELH_V4F32 (const AKSIMD_V4F32 &xyzw, const AKSIMD_V4F32 &abcd) |
AKSIMD arithmetic | |
| |
#define | AKSIMD_SUB_V4F32(__a__, __b__) vsubq_f32( (__a__), (__b__) ) |
#define | AKSIMD_SUB_V2F32(__a__, __b__) vsub_f32( (__a__), (__b__) ) |
#define | AKSIMD_SUB_SS_V4F32(__a__, __b__) vsubq_f32( (__a__), vsetq_lane_f32( AKSIMD_GETELEMENT_V4F32( (__b__), 0 ), AKSIMD_SETZERO_V4F32(), 0 ) ); |
#define | AKSIMD_ADD_V4F32(__a__, __b__) vaddq_f32( (__a__), (__b__) ) |
#define | AKSIMD_ADD_V2F32(__a__, __b__) vadd_f32( (__a__), (__b__) ) |
#define | AKSIMD_ADD_V4I32(__a__, __b__) vaddq_s32( (__a__), (__b__) ) |
Adds the four integers of a and b. |
|
#define | AKSIMD_MULLO16_V4I32(__a__, __b__) vmulq_s32(__a__, __b__) |
Multiplies the 4 low-parts of both operand into the 4 32-bit integers (no overflow). |
|
#define | AKSIMD_COMP_V4F32(__a__, __b__) vceqq_f32( (__a__), (__b__) ) |
#define | AKSIMD_COMP_V2F32(__a__, __b__) vceq_f32( (__a__), (__b__) ) |
#define | AKSIMD_ADD_SS_V4F32(__a__, __b__) vaddq_f32( (__a__), vsetq_lane_f32( AKSIMD_GETELEMENT_V4F32( (__b__), 0 ), AKSIMD_SETZERO_V4F32(), 0 ) ) |
#define | AKSIMD_MUL_V4F32(__a__, __b__) vmulq_f32( (__a__), (__b__) ) |
#define | AKSIMD_MUL_V4F32_SCALAR(__a__, __b__) vmulq_n_f32( (__a__), (__b__) ) |
#define | AKSIMD_MUL_V2F32(__a__, __b__) vmul_f32( (__a__), (__b__) ) |
#define | AKSIMD_MUL_V2F32_SCALAR(__a__, __b__) vmul_n_f32( (__a__), (__b__) ) |
#define | AKSIMD_MUL_SS_V4F32(__a__, __b__) vmulq_f32( (__a__), vsetq_lane_f32( AKSIMD_GETELEMENT_V4F32( (__b__), 0 ), AKSIMD_SETZERO_V4F32(), 0 ) ) |
#define | AKSIMD_MADD_V4F32(__a__, __b__, __c__) AKSIMD_ADD_V4F32( AKSIMD_MUL_V4F32( (__a__), (__b__) ), (__c__) ) |
Vector multiply-add operation. |
|
#define | AKSIMD_MSUB_V4F32(__a__, __b__, __c__) AKSIMD_SUB_V4F32( AKSIMD_MUL_V4F32( (__a__), (__b__) ), (__c__) ) |
#define | AKSIMD_MADD_V2F32(__a__, __b__, __c__) AKSIMD_ADD_V2F32( AKSIMD_MUL_V2F32( (__a__), (__b__) ), (__c__) ) |
#define | AKSIMD_MSUB_V2F32(__a__, __b__, __c__) AKSIMD_SUB_V2F32( AKSIMD_MUL_V2F32( (__a__), (__b__) ), (__c__) ) |
#define | AKSIMD_MADD_V4F32_INST(__a__, __b__, __c__) vmlaq_f32( (__c__), (__a__), (__b__) ) |
#define | AKSIMD_MADD_V2F32_INST(__a__, __b__, __c__) vmla_f32( (__c__), (__a__), (__b__) ) |
#define | AKSIMD_MADD_V4F32_SCALAR(__a__, __b__, __c__) vmlaq_n_f32( (__c__), (__a__), (__b__) ) |
#define | AKSIMD_MADD_V2F32_SCALAR(__a__, __b__, __c__) vmla_n_f32( (__c__), (__a__), (__b__) ) |
#define | AKSIMD_MIN_V4F32(__a__, __b__) vminq_f32( (__a__), (__b__) ) |
#define | AKSIMD_MIN_V2F32(__a__, __b__) vmin_f32( (__a__), (__b__) ) |
#define | AKSIMD_MAX_V4F32(__a__, __b__) vmaxq_f32( (__a__), (__b__) ) |
#define | AKSIMD_MAX_V2F32(__a__, __b__) vmax_f32( (__a__), (__b__) ) |
#define | AKSIMD_ABS_V4F32(__a__) vabsq_f32((__a__)) |
Returns absolute value. |
|
#define | AKSIMD_NEG_V2F32(__a__) vneg_f32( (__a__) ) |
Changes the sign. |
|
#define | AKSIMD_NEG_V4F32(__a__) vnegq_f32( (__a__) ) |
#define | AKSIMD_SQRT_V4F32(__vec__) vrecpeq_f32( vrsqrteq_f32( __vec__ ) ) |
Square root (4 floats). |
|
#define | AKSIMD_SQRT_V2F32(__vec__) vrecpe_f32( vrsqrte_f32( __vec__ ) ) |
Square root (2 floats). |
|
AkForceInline AKSIMD_V4F32 | AKSIMD_DIV_V4F32 (AKSIMD_V4F32 a, AKSIMD_V4F32 b) |
Rough estimation of division. |
|
AkForceInline AKSIMD_V4F32 | AKSIMD_MADD_SS_V4F32 (const AKSIMD_V4F32 &__a__, const AKSIMD_V4F32 &__b__, const AKSIMD_V4F32 &__c__) |
Vector multiply-add operation. |
|
static AkForceInline void | AKSIMD_HORIZONTALADD (AKSIMD_V4F32 &vVec) |
static AkForceInline AKSIMD_V4F32 | AKSIMD_COMPLEXMUL (AKSIMD_V4F32 vCIn1, AKSIMD_V4F32 vCIn2) |
Cross-platform SIMD multiplication of 2 complex data elements with interleaved real and imaginary parts. |
|
AKSIMD packing / unpacking | |
| |
#define | AKSIMD_UNPACKLO_VECTOR8I16(__a__, __b__) vreinterpretq_s32_s16( vzipq_s16( vreinterpretq_s16_s32(__a__), vreinterpretq_s16_s32(__b__) ).val[0] ) |
#define | AKSIMD_UNPACKHI_VECTOR8I16(__a__, __b__) vreinterpretq_s32_s16( vzipq_s16( vreinterpretq_s16_s32(__a__), vreinterpretq_s16_s32(__b__) ).val[1] ) |
#define | AKSIMD_HILO_V2F32(in_vec1, in_vec2) vreinterpret_f32_u32( vext_u32( vreinterpret_u32_f32( in_vec1 ), vreinterpret_u32_f32( in_vec2 ), 1 ) ) |
#define | AKSIMD_TRANSPOSE_V2F32(in_vec1, in_vec2) vtrn_f32( in_vec1, in_vec2 ) |
#define | AKSIMD_TRANSPOSE_V4F32(in_vec1, in_vec2) vtrnq_f32( in_vec1, in_vec2 ) |
#define | AKSIMD_SWAP_V2F32(in_vec) vrev64_f32( in_vec ) |
V1 = {a,b} => VR = {b,a}. |
|
AkForceInline AKSIMD_V4F32 | AKSIMD_UNPACKLO_V4F32 (const AKSIMD_V4F32 &in_vec1, const AKSIMD_V4F32 &in_vec2) |
AkForceInline AKSIMD_V4F32 | AKSIMD_UNPACKHI_V4F32 (const AKSIMD_V4F32 &in_vec1, const AKSIMD_V4F32 &in_vec2) |
AkForceInline AKSIMD_V4I32 | AKSIMD_PACKS_V4I32 (const AKSIMD_V4I32 &in_vec1, const AKSIMD_V4I32 &in_vec2) |
AKSIMD vector comparison | |
Apart from AKSIMD_SEL_GTEQ_V4F32, these implementations are limited to a few platforms. | |
#define | AKSIMD_CMP_CTRLMASK uint32x4_t |
#define | AKSIMD_GTEQ_V4F32(__a__, __b__) vcgeq_f32( (__a__), (__b__)) |
Compare each float element and return control mask. |
|
#define | AKSIMD_GTEQ_V4I32(__a__, __b__) vcgeq_s32( (__a__), (__b__)) |
Compare each integer element and return control mask. |
|
#define | AKSIMD_EQ_V4F32(__a__, __b__) vceqq_f32( (__a__), (__b__)) |
Compare each float element and return control mask. |
|
#define | AKSIMD_EQ_V4I32(__a__, __b__) vceqq_s32( (__a__), (__b__)) |
Compare each integer element and return control mask. |
|
#define | AKSIMD_VSEL_V4F32(__a__, __b__, __c__) vbslq_f32( (__c__), (__b__), (__a__) ) |
Return a when control mask is 0, return b when control mask is non zero, control mask is in c and usually provided by above comparison operations. |
|
#define | AKSIMD_SEL_GTEQ_V4F32(__a__, __b__, __cond1__, __cond2__) AKSIMD_VSEL_V4F32( __a__, __b__, AKSIMD_GTEQ_V4F32( __cond1__, __cond2__ ) ) |
#define | AKSIMD_SEL_GTEZ_V4F32(__a__, __b__, __c__) AKSIMD_VSEL_V4F32( (__c__), (__b__), AKSIMD_GTEQ_V4F32( __a__, AKSIMD_SETZERO_V4F32() ) ) |
#define | AKSIMD_SPLAT_V4F32(var, idx) vmovq_n_f32(vgetq_lane_f32(var, idx)) |
static AkForceInline int | AKSIMD_MASK_V4F32 (const AKSIMD_V4F32 &in_vec1) |
AKSIMD - arm_neon implementation
Definition in file AkSimd.h.
프로젝트를 등록하세요. 아무런 조건이나 의무 사항 없이 빠른 시작을 도와드리겠습니다.
Wwise를 시작해 보세요