Go to the source code of this file.
Defines | |
Platform specific memory size alignment for allocation purposes | |
#define | AKSIMD_ALIGNSIZE(__Size__) (((__Size__) + 15) & ~15) |
AKSIMD loading / setting | |
#define | AKSIMD_LOAD_V4F32(__addr__) vld1q_f32( (float32_t*)(__addr__) ) |
Loads four single-precision, floating-point values (see _mm_load_ps). |
|
#define | AKSIMD_LOADU_V4F32(__addr__) vld1q_f32( (float32_t*)(__addr__) ) |
#define | AKSIMD_LOAD1_V4F32(__scalar__) vld1q_dup_f32( (float32_t*)(&(__scalar__)) ) |
#define | AKSIMD_SET_V4F32(__scalar__) vdupq_n_f32( __scalar__ ) |
#define | AKSIMD_SET_V4I32(__scalar__) vdupq_n_s32( __scalar__ ) |
Sets the four integer values to __scalar__. |
|
#define | AKSIMD_SETZERO_V4F32() AKSIMD_SET_V4F32( 0 ) |
#define | AKSIMD_LOAD_SS_V4F32(__addr__) vld1q_lane_f32( (float32_t*)(__addr__), AKSIMD_SETZERO_V4F32(), 0 ); |
#define | AKSIMD_LOAD_V4I32(__addr__) vld1q_s32( (const int32_t*)(__addr__) ) |
Loads four 32-bit signed integer values (aligned). |
|
#define | AKSIMD_LOAD_V8I16(__addr__) vld1q_s16( (const int16_t*)(__addr__) ) |
Loads 8 16-bit signed integer values (aligned). |
|
#define | AKSIMD_LOAD_V4I16(__addr__) vld1_s16( (const int16_t*)(__addr__) ) |
Loads 4 16-bit signed integer values (aligned). |
|
#define | AKSIMD_LOADU_V4I32(__addr__) *__addr__ |
Loads unaligned 128-bit value (see _mm_loadu_si128). |
|
#define | AKSIMD_SETZERO_V4I32() vdupq_n_s32( 0 ) |
Sets the four 32-bit integer values to zero (see _mm_setzero_si128). |
|
#define | AKSIMD_LOAD_V2F32(__addr__) vld1_f32( (float32_t*)(__addr__) ) |
Loads two single-precision, floating-point values. |
|
#define | AKSIMD_LOAD_V2F32_LANE(__addr__, __vec__, __lane__) vld1_lane_f32( (float32_t*)(__addr__), (__vec__), (__lane__) ); |
#define | AKSIMD_SET_V2F32(__scalar__) vdup_n_f32( __scalar__ ) |
Sets the two single-precision, floating-point values to __scalar__. |
|
#define | AKSIMD_SETZERO_V2F32() AKSIMD_SET_V2F32( 0 ) |
Sets the two single-precision, floating-point values to zero. |
|
#define | AKSIMD_LOAD_V4F32X2(__addr__) vld2q_f32( (float32_t*)(__addr__) ) |
Loads data from memory and de-interleaves. |
|
#define | AKSIMD_LOAD_V2F32X2(__addr__) vld2_f32( (float32_t*)(__addr__) ) |
#define | AKSIMD_LOAD_V2F32X2_LANE(__addr__, __vec__, __lane__) vld2_lane_f32( (float32_t*)(__addr__), (__vec__), (__lane__) ); |
Loads data from memory and de-interleaves; only selected lane. |
|
#define | AKSIMD_LOAD_V4F32X4_LANE(__addr__, __vec__, __lane__) vld4q_lane_f32( (float32_t*)(__addr__), (__vec__), (__lane__) ); |
AKSIMD storing | |
#define | AKSIMD_STORE_V4F32(__addr__, __vName__) vst1q_f32( (float32_t*)(__addr__), (__vName__) ) |
Stores four single-precision, floating-point values. The address must be 16-byte aligned. |
|
#define | AKSIMD_STOREU_V4F32(__addr__, __vec__) vst1q_f32( (float32_t*)(__addr__), (__vec__) ) |
Stores four single-precision, floating-point values. The address does not need to be 16-byte aligned. |
|
#define | AKSIMD_STORE1_V4F32(__addr__, __vec__) vst1q_lane_f32( (float32_t*)(__addr__), (__vec__), 0 ) |
#define | AKSIMD_STORE_V4I32(__addr__, __vec__) vst1q_s32( (int32_t*)(__addr__), (__vec__) ) |
Stores four 32-bit integer values. The address must be 16-byte aligned. |
|
#define | AKSIMD_STOREU_V4I32(__addr__, __vec__) vst1q_s32( (int32_t*)(__addr__), (__vec__) ) |
Stores four 32-bit integer values. The address does not need to be 16-byte aligned. |
|
#define | AKSIMD_STOREU_V4UI32(__addr__, __vec__) vst1q_u32( (uint32_t*)(__addr__), (__vec__) ) |
Stores four 32-bit unsigned integer values. The address does not need to be 16-byte aligned. |
|
#define | AKSIMD_STORE_V2F32(__addr__, __vName__) vst1_f32( (AkReal32*)(__addr__), (__vName__) ) |
Stores two single-precision, floating-point values. The address must be 16-byte aligned. |
|
#define | AKSIMD_STORE_V4F32X2(__addr__, __vName__) vst2q_f32( (float32_t*)(__addr__), (__vName__) ) |
Stores data by interleaving into memory. |
|
#define | AKSIMD_STORE_V2F32X2(__addr__, __vName__) vst2_f32( (float32_t*)(__addr__), (__vName__) ) |
AKSIMD conversion | |
#define | AKSIMD_CONVERT_V4I32_TO_V4F32(__vec__) vcvtq_f32_s32( __vec__ ) |
#define | AKSIMD_CONVERT_V4F32_TO_V4I32(__vec__) vcvtq_s32_f32( __vec__ ) |
#define | AKSIMD_TRUNCATE_V4F32_TO_V4I32(__vec__) vcvtq_s32_f32( (__vec__) ) |
#define | AKSIMD_CONVERT_V2F32_TO_V2I32(__vec__) vcvt_s32_f32( __vec__ ) |
AKSIMD logical operations | |
#define | AKSIMD_AND_V4I32(__a__, __b__) vandq_s32( (__a__), (__b__) ) |
#define | AKSIMD_CMPGT_V8I16(__a__, __b__) vreinterpretq_s32_u16( vcgtq_s16( vreinterpretq_s16_s32(__a__), vreinterpretq_s16_s32(__b__) ) ) |
#define | AKSIMD_CMPLE_V4F32(__a__, __b__) vcleq_f32( (__a__), (__b__) ) |
Compares for less than or equal (see _mm_cmple_ps). |
|
AKSIMD shifting | |
#define | AKSIMD_SHIFTLEFT_V4I32(__vec__, __shiftBy__) vshlq_n_s32( (__vec__), (__shiftBy__) ) |
#define | AKSIMD_SHIFTRIGHTARITH_V4I32(__vec__, __shiftBy__) vrshrq_n_s32( (__vec__), (__shiftBy__) ) |
AKSIMD vector comparison | |
#define | AKSIMD_CMP_CTRLMASK uint32x4_t |
#define | AKSIMD_GTEQ_V4F32(__a__, __b__) vcgeq_f32( (__a__), (__b__)) |
Compare each float element and return control mask. |
|
#define | AKSIMD_GTEQ_V4I32(__a__, __b__) vcgeq_s32( (__a__), (__b__)) |
Compare each integer element and return control mask. |
|
#define | AKSIMD_EQ_V4F32(__a__, __b__) vceqq_f32( (__a__), (__b__)) |
Compare each float element and return control mask. |
|
#define | AKSIMD_EQ_V4I32(__a__, __b__) vceqq_s32( (__a__), (__b__)) |
Compare each integer element and return control mask. |
|
#define | AKSIMD_VSEL_V4F32(__a__, __b__, __c__) vbslq_f32( (__c__), (__b__), (__a__) ) |
Return a when control mask is 0, return b when control mask is non zero, control mask is in c and usually provided by above comparison operations. |
|
#define | AKSIMD_SEL_GTEQ_V4F32(__a__, __b__, __cond1__, __cond2__) AKSIMD_VSEL_V4F32( __a__, __b__, AKSIMD_GTEQ_V4F32( __cond1__, __cond2__ ) ) |
#define | AKSIMD_SEL_GTEZ_V4F32(__a__, __b__, __c__) AKSIMD_VSEL_V4F32( (__c__), (__b__), AKSIMD_GTEQ_V4F32( __a__, AKSIMD_SETZERO_V4F32() ) ) |
#define | AKSIMD_SPLAT_V4F32(var, idx) vmovq_n_f32(vgetq_lane_f32(var, idx)) |
AKSIMD types | |
| |
#define | AKSIMD_V4F32_SUPPORTED |
typedef int32x4_t | AKSIMD_V4I32 |
Vector of 4 32-bit signed integers. |
|
typedef int16x8_t | AKSIMD_V8I16 |
Vector of 8 16-bit signed integers. |
|
typedef int16x4_t | AKSIMD_V4I16 |
Vector of 4 16-bit signed integers. |
|
typedef uint32x4_t | AKSIMD_V4UI32 |
Vector of 4 32-bit unsigned signed integers. |
|
typedef uint32x2_t | AKSIMD_V2UI32 |
Vector of 2 32-bit unsigned signed integers. |
|
typedef int32x2_t | AKSIMD_V2I32 |
Vector of 2 32-bit signed integers. |
|
typedef float32_t | AKSIMD_F32 |
32-bit float |
|
typedef float32x2_t | AKSIMD_V2F32 |
Vector of 2 32-bit floats. |
|
typedef float32x4_t | AKSIMD_V4F32 |
Vector of 4 32-bit floats. |
|
typedef uint32x4_t | AKSIMD_V4COND |
Vector of 4 comparison results. |
|
typedef uint32x4_t | AKSIMD_V4ICOND |
Vector of 4 comparison results. |
|
typedef uint32x4_t | AKSIMD_V4FCOND |
Vector of 4 comparison results. |
|
typedef float32x2x2_t | AKSIMD_V2F32X2 |
typedef float32x4x2_t | AKSIMD_V4F32X2 |
typedef float32x4x4_t | AKSIMD_V4F32X4 |
AKSIMD shuffling | |
| |
#define | AKSIMD_COMBINE_V2F32(a, b) vcombine_f32( a, b ) |
#define | AKSIMD_SHUFFLE(fp3, fp2, fp1, fp0) (((fp3) << 6) | ((fp2) << 4) | ((fp1) << 2) | ((fp0))) |
#define | AKSIMD_SHUFFLE_V4F32(a, b, zyxw) _AKSIMD_LOCAL::SHUFFLE_V4F32< zyxw >( a, b ) |
#define | AKSIMD_SHUFFLE_BADC(__a__) vrev64q_f32( __a__ ) |
Swap the 2 lower floats together and the 2 higher floats together. |
|
#define | AKSIMD_SHUFFLE_CDAB(__a__) vcombine_f32( vget_high_f32(__a__), vget_low_f32(__a__) ) |
Swap the 2 lower floats with the 2 higher floats. |
|
#define | AKSIMD_DUP_ODD(__vv) AKSIMD_SHUFFLE_V4F32(__vv, __vv, AKSIMD_SHUFFLE(3,3,1,1)) |
Duplicates the odd items into the even items (d c b a -> d d b b ). |
|
#define | AKSIMD_DUP_EVEN(__vv) AKSIMD_SHUFFLE_V4F32(__vv, __vv, AKSIMD_SHUFFLE(2,2,0,0)) |
Duplicates the even items into the odd items (d c b a -> c c a a ). |
|
AKSIMD_V4F32 | AKSIMD_MOVEHL_V4F32 (const AKSIMD_V4F32 abcd, const AKSIMD_V4F32 xyzw) |
AKSIMD_V4F32 | AKSIMD_MOVELH_V4F32 (const AKSIMD_V4F32 &xyzw, const AKSIMD_V4F32 &abcd) |
AKSIMD arithmetic | |
| |
#define | AKSIMD_SUB_V4F32(__a__, __b__) vsubq_f32( (__a__), (__b__) ) |
#define | AKSIMD_SUB_V2F32(__a__, __b__) vsub_f32( (__a__), (__b__) ) |
#define | AKSIMD_SUB_SS_V4F32(__a__, __b__) vsubq_f32( (__a__), vsetq_lane_f32( AKSIMD_GETELEMENT_V4F32( (__b__), 0 ), AKSIMD_SETZERO_V4F32(), 0 ) ); |
#define | AKSIMD_ADD_V4F32(__a__, __b__) vaddq_f32( (__a__), (__b__) ) |
#define | AKSIMD_ADD_V2F32(__a__, __b__) vadd_f32( (__a__), (__b__) ) |
#define | AKSIMD_ADD_V4I32(__a__, __b__) vaddq_s32( (__a__), (__b__) ) |
Adds the four integers of a and b. |
|
#define | AKSIMD_COMP_V4F32(__a__, __b__) vceqq_f32( (__a__), (__b__) ) |
#define | AKSIMD_COMP_V2F32(__a__, __b__) vceq_f32( (__a__), (__b__) ) |
#define | AKSIMD_ADD_SS_V4F32(__a__, __b__) vaddq_f32( (__a__), vsetq_lane_f32( AKSIMD_GETELEMENT_V4F32( (__b__), 0 ), AKSIMD_SETZERO_V4F32(), 0 ) ) |
#define | AKSIMD_MUL_V4F32(__a__, __b__) vmulq_f32( (__a__), (__b__) ) |
#define | AKSIMD_MUL_V4F32_SCALAR(__a__, __b__) vmulq_n_f32( (__a__), (__b__) ) |
#define | AKSIMD_MUL_V2F32(__a__, __b__) vmul_f32( (__a__), (__b__) ) |
#define | AKSIMD_MUL_V2F32_SCALAR(__a__, __b__) vmul_n_f32( (__a__), (__b__) ) |
#define | AKSIMD_MUL_SS_V4F32(__a__, __b__) vmulq_f32( (__a__), vsetq_lane_f32( AKSIMD_GETELEMENT_V4F32( (__b__), 0 ), AKSIMD_SETZERO_V4F32(), 0 ) ) |
#define | AKSIMD_MADD_V4F32(__a__, __b__, __c__) AKSIMD_ADD_V4F32( AKSIMD_MUL_V4F32( (__a__), (__b__) ), (__c__) ) |
Vector multiply-add operation. |
|
#define | AKSIMD_MSUB_V4F32(__a__, __b__, __c__) AKSIMD_SUB_V4F32( AKSIMD_MUL_V4F32( (__a__), (__b__) ), (__c__) ) |
#define | AKSIMD_MADD_V2F32(__a__, __b__, __c__) AKSIMD_ADD_V2F32( AKSIMD_MUL_V2F32( (__a__), (__b__) ), (__c__) ) |
#define | AKSIMD_MSUB_V2F32(__a__, __b__, __c__) AKSIMD_SUB_V2F32( AKSIMD_MUL_V2F32( (__a__), (__b__) ), (__c__) ) |
#define | AKSIMD_MADD_V4F32_INST(__a__, __b__, __c__) vmlaq_f32( (__c__), (__a__), (__b__) ) |
#define | AKSIMD_MADD_V2F32_INST(__a__, __b__, __c__) vmla_f32( (__c__), (__a__), (__b__) ) |
#define | AKSIMD_MADD_V4F32_SCALAR(__a__, __b__, __c__) vmlaq_n_f32( (__c__), (__a__), (__b__) ) |
#define | AKSIMD_MADD_V2F32_SCALAR(__a__, __b__, __c__) vmla_n_f32( (__c__), (__a__), (__b__) ) |
#define | AKSIMD_MIN_V4F32(__a__, __b__) vminq_f32( (__a__), (__b__) ) |
#define | AKSIMD_MIN_V2F32(__a__, __b__) vmin_f32( (__a__), (__b__) ) |
#define | AKSIMD_MAX_V4F32(__a__, __b__) vmaxq_f32( (__a__), (__b__) ) |
#define | AKSIMD_MAX_V2F32(__a__, __b__) vmax_f32( (__a__), (__b__) ) |
#define | AKSIMD_ABS_V4F32(__a__) vabsq_f32((__a__)) |
Returns absolute value. |
|
#define | AKSIMD_NEG_V2F32(__a__) vneg_f32( (__a__) ) |
Changes the sign. |
|
#define | AKSIMD_NEG_V4F32(__a__) vnegq_f32( (__a__) ) |
#define | AKSIMD_SQRT_V4F32(__vec__) vrecpeq_f32( vrsqrteq_f32( __vec__ ) ) |
Square root (4 floats). |
|
#define | AKSIMD_SQRT_V2F32(__vec__) vrecpe_f32( vrsqrte_f32( __vec__ ) ) |
Square root (2 floats). |
|
AkForceInline AKSIMD_V4F32 | AKSIMD_DIV_V4F32 (AKSIMD_V4F32 a, AKSIMD_V4F32 b) |
Rough estimation of division. |
|
AkForceInline AKSIMD_V4F32 | AKSIMD_MADD_SS_V4F32 (const AKSIMD_V4F32 &__a__, const AKSIMD_V4F32 &__b__, const AKSIMD_V4F32 &__c__) |
Vector multiply-add operation. |
|
static AkForceInline void | AKSIMD_HORIZONTALADD (AKSIMD_V4F32 &vVec) |
static AkForceInline AKSIMD_V4F32 | AKSIMD_COMPLEXMUL (AKSIMD_V4F32 vCIn1, AKSIMD_V4F32 vCIn2) |
Cross-platform SIMD multiplication of 2 complex data elements with interleaved real and imaginary parts. |
|
AKSIMD packing / unpacking | |
| |
#define | AKSIMD_UNPACKLO_VECTOR8I16(__a__, __b__) vreinterpretq_s32_s16( vzipq_s16( vreinterpretq_s16_s32(__a__), vreinterpretq_s16_s32(__b__) ).val[0] ) |
#define | AKSIMD_UNPACKHI_VECTOR8I16(__a__, __b__) vreinterpretq_s32_s16( vzipq_s16( vreinterpretq_s16_s32(__a__), vreinterpretq_s16_s32(__b__) ).val[1] ) |
#define | AKSIMD_HILO_V2F32(in_vec1, in_vec2) vreinterpret_f32_u32( vext_u32( vreinterpret_u32_f32( in_vec1 ), vreinterpret_u32_f32( in_vec2 ), 1 ) ) |
#define | AKSIMD_TRANSPOSE_V2F32(in_vec1, in_vec2) vtrn_f32( in_vec1, in_vec2 ) |
#define | AKSIMD_TRANSPOSE_V4F32(in_vec1, in_vec2) vtrnq_f32( in_vec1, in_vec2 ) |
#define | AKSIMD_SWAP_V2F32(in_vec) vrev64_f32( in_vec ) |
V1 = {a,b} => VR = {b,a}. |
|
AkForceInline AKSIMD_V4F32 | AKSIMD_UNPACKLO_V4F32 (const AKSIMD_V4F32 &in_vec1, const AKSIMD_V4F32 &in_vec2) |
AkForceInline AKSIMD_V4F32 | AKSIMD_UNPACKHI_V4F32 (const AKSIMD_V4F32 &in_vec1, const AKSIMD_V4F32 &in_vec2) |
AkForceInline AKSIMD_V4I32 | AKSIMD_PACKS_V4I32 (const AKSIMD_V4I32 &in_vec1, const AKSIMD_V4I32 &in_vec2) |
AKSIMD - arm_neon implementation
Definition in file AkSimd.h.
Questions? Problems? Need more info? Contact us, and we can help!
Visit our Support pageRegister your project and we'll help you get started with no strings attached!
Get started with Wwise