include/AK/SoundEngine/Platforms/arm_neon/AkSimd.h File Reference

Go to the source code of this file.

Defines
Platform specific memory size alignment for allocation purposes

#define	AKSIMD_ALIGNSIZE(__Size__) (((__Size__) + 15) & ~15)
AKSIMD loading / setting

#define	AKSIMD_LOAD_V4F32(__addr__) vld1q_f32( (float32_t*)(__addr__) )
	Loads four single-precision, floating-point values (see _mm_load_ps).
#define	AKSIMD_LOADU_V4F32(__addr__) vld1q_f32( (float32_t*)(__addr__) )
#define	AKSIMD_LOAD1_V4F32(__scalar__) vld1q_dup_f32( (float32_t*)(&(__scalar__)) )
#define	AKSIMD_SET_V4F32(__scalar__) vdupq_n_f32( __scalar__ )
#define	AKSIMD_SET_V4I32(__scalar__) vdupq_n_s32( __scalar__ )
	Sets the four integer values to __scalar__.
#define	AKSIMD_SETZERO_V4F32() AKSIMD_SET_V4F32( 0 )
#define	AKSIMD_LOAD_SS_V4F32(__addr__) vld1q_lane_f32( (float32_t*)(__addr__), AKSIMD_SETZERO_V4F32(), 0 );
#define	AKSIMD_LOAD_V4I32(__addr__) vld1q_s32( (const int32_t*)(__addr__) )
	Loads four 32-bit signed integer values (aligned).
#define	AKSIMD_LOAD_V8I16(__addr__) vld1q_s16( (const int16_t*)(__addr__) )
	Loads 8 16-bit signed integer values (aligned).
#define	AKSIMD_LOAD_V4I16(__addr__) vld1_s16( (const int16_t*)(__addr__) )
	Loads 4 16-bit signed integer values (aligned).
#define	AKSIMD_LOADU_V4I32(__addr__) *__addr__
	Loads unaligned 128-bit value (see _mm_loadu_si128).
#define	AKSIMD_SETZERO_V4I32() vdupq_n_s32( 0 )
	Sets the four 32-bit integer values to zero (see _mm_setzero_si128).
#define	AKSIMD_LOAD_V2F32(__addr__) vld1_f32( (float32_t*)(__addr__) )
	Loads two single-precision, floating-point values.
#define	AKSIMD_LOAD_V2F32_LANE(__addr__, __vec__, __lane__) vld1_lane_f32( (float32_t*)(__addr__), (__vec__), (__lane__) );
#define	AKSIMD_SET_V2F32(__scalar__) vdup_n_f32( __scalar__ )
	Sets the two single-precision, floating-point values to __scalar__.
#define	AKSIMD_SETZERO_V2F32() AKSIMD_SET_V2F32( 0 )
	Sets the two single-precision, floating-point values to zero.
#define	AKSIMD_LOAD_V4F32X2(__addr__) vld2q_f32( (float32_t*)(__addr__) )
	Loads data from memory and de-interleaves.
#define	AKSIMD_LOAD_V2F32X2(__addr__) vld2_f32( (float32_t*)(__addr__) )
#define	AKSIMD_LOAD_V2F32X2_LANE(__addr__, __vec__, __lane__) vld2_lane_f32( (float32_t*)(__addr__), (__vec__), (__lane__) );
	Loads data from memory and de-interleaves; only selected lane.
#define	AKSIMD_LOAD_V4F32X4_LANE(__addr__, __vec__, __lane__) vld4q_lane_f32( (float32_t*)(__addr__), (__vec__), (__lane__) );
AKSIMD storing

#define	AKSIMD_STORE_V4F32(__addr__, __vName__) vst1q_f32( (float32_t*)(__addr__), (__vName__) )
	Stores four single-precision, floating-point values. The address must be 16-byte aligned.
#define	AKSIMD_STOREU_V4F32(__addr__, __vec__) vst1q_f32( (float32_t*)(__addr__), (__vec__) )
	Stores four single-precision, floating-point values. The address does not need to be 16-byte aligned.
#define	AKSIMD_STORE1_V4F32(__addr__, __vec__) vst1q_lane_f32( (float32_t*)(__addr__), (__vec__), 0 )
#define	AKSIMD_STORE_V4I32(__addr__, __vec__) vst1q_s32( (int32_t*)(__addr__), (__vec__) )
	Stores four 32-bit integer values. The address must be 16-byte aligned.
#define	AKSIMD_STOREU_V4I32(__addr__, __vec__) vst1q_s32( (int32_t*)(__addr__), (__vec__) )
	Stores four 32-bit integer values. The address does not need to be 16-byte aligned.
#define	AKSIMD_STOREU_V4UI32(__addr__, __vec__) vst1q_u32( (uint32_t*)(__addr__), (__vec__) )
	Stores four 32-bit unsigned integer values. The address does not need to be 16-byte aligned.
#define	AKSIMD_STORE_V2F32(__addr__, __vName__) vst1_f32( (AkReal32*)(__addr__), (__vName__) )
	Stores two single-precision, floating-point values. The address must be 16-byte aligned.
#define	AKSIMD_STORE_V4F32X2(__addr__, __vName__) vst2q_f32( (float32_t*)(__addr__), (__vName__) )
	Stores data by interleaving into memory.
#define	AKSIMD_STORE_V2F32X2(__addr__, __vName__) vst2_f32( (float32_t*)(__addr__), (__vName__) )
AKSIMD conversion

#define	AKSIMD_CONVERT_V4I32_TO_V4F32(__vec__) vcvtq_f32_s32( __vec__ )
#define	AKSIMD_CONVERT_V4F32_TO_V4I32(__vec__) vcvtq_s32_f32( __vec__ )
#define	AKSIMD_TRUNCATE_V4F32_TO_V4I32(__vec__) vcvtq_s32_f32( (__vec__) )
#define	AKSIMD_CONVERT_V2F32_TO_V2I32(__vec__) vcvt_s32_f32( __vec__ )
AKSIMD logical operations

#define	AKSIMD_AND_V4I32(__a__, __b__) vandq_s32( (__a__), (__b__) )
#define	AKSIMD_CMPGT_V8I16(__a__, __b__) vreinterpretq_s32_u16( vcgtq_s16( vreinterpretq_s16_s32(__a__), vreinterpretq_s16_s32(__b__) ) )
#define	AKSIMD_CMPLE_V4F32(__a__, __b__) vcleq_f32( (__a__), (__b__) )
	Compares for less than or equal (see _mm_cmple_ps).
AKSIMD shifting

#define	AKSIMD_SHIFTLEFT_V4I32(__vec__, __shiftBy__) vshlq_n_s32( (__vec__), (__shiftBy__) )
#define	AKSIMD_SHIFTRIGHTARITH_V4I32(__vec__, __shiftBy__) vrshrq_n_s32( (__vec__), (__shiftBy__) )
AKSIMD vector comparison
Apart from AKSIMD_SEL_GTEQ_V4F32, these implementations are limited to a few platforms.
#define	AKSIMD_CMP_CTRLMASK uint32x4_t
#define	AKSIMD_GTEQ_V4F32(__a__, __b__) vcgeq_f32( (__a__), (__b__))
	Compare each float element and return control mask.
#define	AKSIMD_GTEQ_V4I32(__a__, __b__) vcgeq_s32( (__a__), (__b__))
	Compare each integer element and return control mask.
#define	AKSIMD_EQ_V4F32(__a__, __b__) vceqq_f32( (__a__), (__b__))
	Compare each float element and return control mask.
#define	AKSIMD_EQ_V4I32(__a__, __b__) vceqq_s32( (__a__), (__b__))
	Compare each integer element and return control mask.
#define	AKSIMD_VSEL_V4F32(__a__, __b__, __c__) vbslq_f32( (__c__), (__b__), (__a__) )
	Return a when control mask is 0, return b when control mask is non zero, control mask is in c and usually provided by above comparison operations.
#define	AKSIMD_SEL_GTEQ_V4F32(__a__, __b__, __cond1__, __cond2__) AKSIMD_VSEL_V4F32( __a__, __b__, AKSIMD_GTEQ_V4F32( __cond1__, __cond2__ ) )
#define	AKSIMD_SEL_GTEZ_V4F32(__a__, __b__, __c__) AKSIMD_VSEL_V4F32( (__c__), (__b__), AKSIMD_GTEQ_V4F32( __a__, AKSIMD_SETZERO_V4F32() ) )
#define	AKSIMD_SPLAT_V4F32(var, idx) vmovq_n_f32(vgetq_lane_f32(var, idx))
Typedefs
AKSIMD types

typedef int32x4_t	AKSIMD_V4I32
	Vector of 4 32-bit signed integers.
typedef int16x8_t	AKSIMD_V8I16
	Vector of 8 16-bit signed integers.
typedef int16x4_t	AKSIMD_V4I16
	Vector of 4 16-bit signed integers.
typedef uint32x4_t	AKSIMD_V4UI32
	Vector of 4 32-bit unsigned signed integers.
typedef uint32x2_t	AKSIMD_V2UI32
	Vector of 2 32-bit unsigned signed integers.
typedef int32x2_t	AKSIMD_V2I32
	Vector of 2 32-bit signed integers.
typedef float32_t	AKSIMD_F32
	32-bit float
typedef float32x2_t	AKSIMD_V2F32
	Vector of 2 32-bit floats.
typedef float32x4_t	AKSIMD_V4F32
	Vector of 4 32-bit floats.
typedef uint32x4_t	AKSIMD_V4COND
	Vector of 4 comparison results.
typedef uint32x4_t	AKSIMD_V4ICOND
	Vector of 4 comparison results.
typedef uint32x4_t	AKSIMD_V4FCOND
	Vector of 4 comparison results.
typedef float32x2x2_t	AKSIMD_V2F32X2
typedef float32x4x2_t	AKSIMD_V4F32X2
typedef float32x4x4_t	AKSIMD_V4F32X4
AKSIMD shuffling

#define	AKSIMD_COMBINE_V2F32(a, b) vcombine_f32( a, b )
#define	AKSIMD_SHUFFLE(fp3, fp2, fp1, fp0) (((fp3) << 6) \| ((fp2) << 4) \| ((fp1) << 2) \| ((fp0)))
#define	AKSIMD_SHUFFLE_V4F32(a, b, zyxw) _AKSIMD_LOCAL::SHUFFLE_V4F32< zyxw >( a, b )
#define	AKSIMD_SHUFFLE_BADC(__a__) vrev64q_f32( __a__ )
	Swap the 2 lower floats together and the 2 higher floats together.
#define	AKSIMD_SHUFFLE_CDAB(__a__) vcombine_f32( vget_high_f32(__a__), vget_low_f32(__a__) )
	Swap the 2 lower floats with the 2 higher floats.
#define	AKSIMD_DUP_ODD(__vv) AKSIMD_SHUFFLE_V4F32(__vv, __vv, AKSIMD_SHUFFLE(3,3,1,1))
	Duplicates the odd items into the even items (d c b a -> d d b b ).
#define	AKSIMD_DUP_EVEN(__vv) AKSIMD_SHUFFLE_V4F32(__vv, __vv, AKSIMD_SHUFFLE(2,2,0,0))
	Duplicates the even items into the odd items (d c b a -> c c a a ).
AKSIMD_V4F32	AKSIMD_MOVEHL_V4F32 (const AKSIMD_V4F32 abcd, const AKSIMD_V4F32 xyzw)
AKSIMD_V4F32	AKSIMD_MOVELH_V4F32 (const AKSIMD_V4F32 &xyzw, const AKSIMD_V4F32 &abcd)
AKSIMD arithmetic

#define	AKSIMD_SUB_V4F32(__a__, __b__) vsubq_f32( (__a__), (__b__) )
#define	AKSIMD_SUB_V2F32(__a__, __b__) vsub_f32( (__a__), (__b__) )
#define	AKSIMD_SUB_SS_V4F32(__a__, __b__) vsubq_f32( (__a__), vsetq_lane_f32( AKSIMD_GETELEMENT_V4F32( (__b__), 0 ), AKSIMD_SETZERO_V4F32(), 0 ) );
#define	AKSIMD_ADD_V4F32(__a__, __b__) vaddq_f32( (__a__), (__b__) )
#define	AKSIMD_ADD_V2F32(__a__, __b__) vadd_f32( (__a__), (__b__) )
#define	AKSIMD_ADD_V4I32(__a__, __b__) vaddq_s32( (__a__), (__b__) )
	Adds the four integers of a and b.
#define	AKSIMD_COMP_V4F32(__a__, __b__) vceqq_f32( (__a__), (__b__) )
#define	AKSIMD_COMP_V2F32(__a__, __b__) vceq_f32( (__a__), (__b__) )
#define	AKSIMD_ADD_SS_V4F32(__a__, __b__) vaddq_f32( (__a__), vsetq_lane_f32( AKSIMD_GETELEMENT_V4F32( (__b__), 0 ), AKSIMD_SETZERO_V4F32(), 0 ) )
#define	AKSIMD_MUL_V4F32(__a__, __b__) vmulq_f32( (__a__), (__b__) )
#define	AKSIMD_MUL_V4F32_SCALAR(__a__, __b__) vmulq_n_f32( (__a__), (__b__) )
#define	AKSIMD_MUL_V2F32(__a__, __b__) vmul_f32( (__a__), (__b__) )
#define	AKSIMD_MUL_V2F32_SCALAR(__a__, __b__) vmul_n_f32( (__a__), (__b__) )
#define	AKSIMD_MUL_SS_V4F32(__a__, __b__) vmulq_f32( (__a__), vsetq_lane_f32( AKSIMD_GETELEMENT_V4F32( (__b__), 0 ), AKSIMD_SETZERO_V4F32(), 0 ) )
#define	AKSIMD_MADD_V4F32(__a__, __b__, __c__) AKSIMD_ADD_V4F32( AKSIMD_MUL_V4F32( (__a__), (__b__) ), (__c__) )
	Vector multiply-add operation.
#define	AKSIMD_MSUB_V4F32(__a__, __b__, __c__) AKSIMD_SUB_V4F32( AKSIMD_MUL_V4F32( (__a__), (__b__) ), (__c__) )
#define	AKSIMD_MADD_V2F32(__a__, __b__, __c__) AKSIMD_ADD_V2F32( AKSIMD_MUL_V2F32( (__a__), (__b__) ), (__c__) )
#define	AKSIMD_MSUB_V2F32(__a__, __b__, __c__) AKSIMD_SUB_V2F32( AKSIMD_MUL_V2F32( (__a__), (__b__) ), (__c__) )
#define	AKSIMD_MADD_V4F32_INST(__a__, __b__, __c__) vmlaq_f32( (__c__), (__a__), (__b__) )
#define	AKSIMD_MADD_V2F32_INST(__a__, __b__, __c__) vmla_f32( (__c__), (__a__), (__b__) )
#define	AKSIMD_MADD_V4F32_SCALAR(__a__, __b__, __c__) vmlaq_n_f32( (__c__), (__a__), (__b__) )
#define	AKSIMD_MADD_V2F32_SCALAR(__a__, __b__, __c__) vmla_n_f32( (__c__), (__a__), (__b__) )
#define	AKSIMD_MIN_V4F32(__a__, __b__) vminq_f32( (__a__), (__b__) )
#define	AKSIMD_MIN_V2F32(__a__, __b__) vmin_f32( (__a__), (__b__) )
#define	AKSIMD_MAX_V4F32(__a__, __b__) vmaxq_f32( (__a__), (__b__) )
#define	AKSIMD_MAX_V2F32(__a__, __b__) vmax_f32( (__a__), (__b__) )
#define	AKSIMD_ABS_V4F32(__a__) vabsq_f32((__a__))
	Returns absolute value.
#define	AKSIMD_NEG_V2F32(__a__) vneg_f32( (__a__) )
	Changes the sign.
#define	AKSIMD_NEG_V4F32(__a__) vnegq_f32( (__a__) )
#define	AKSIMD_SQRT_V4F32(__vec__) vrecpeq_f32( vrsqrteq_f32( __vec__ ) )
	Square root (4 floats).
#define	AKSIMD_SQRT_V2F32(__vec__) vrecpe_f32( vrsqrte_f32( __vec__ ) )
	Square root (2 floats).
AkForceInline AKSIMD_V4F32	AKSIMD_DIV_V4F32 (AKSIMD_V4F32 a, AKSIMD_V4F32 b)
	Rough estimation of division.
AkForceInline AKSIMD_V4F32	AKSIMD_MADD_SS_V4F32 (const AKSIMD_V4F32 &__a__, const AKSIMD_V4F32 &__b__, const AKSIMD_V4F32 &__c__)
	Vector multiply-add operation.
static AkForceInline void	AKSIMD_HORIZONTALADD (AKSIMD_V4F32 &vVec)
static AkForceInline AKSIMD_V4F32	AKSIMD_COMPLEXMUL (AKSIMD_V4F32 vCIn1, AKSIMD_V4F32 vCIn2)
	Cross-platform SIMD multiplication of 2 complex data elements with interleaved real and imaginary parts.
AKSIMD packing / unpacking

#define	AKSIMD_UNPACKLO_VECTOR8I16(__a__, __b__) vreinterpretq_s32_s16( vzipq_s16( vreinterpretq_s16_s32(__a__), vreinterpretq_s16_s32(__b__) ).val[0] )
#define	AKSIMD_UNPACKHI_VECTOR8I16(__a__, __b__) vreinterpretq_s32_s16( vzipq_s16( vreinterpretq_s16_s32(__a__), vreinterpretq_s16_s32(__b__) ).val[1] )
#define	AKSIMD_HILO_V2F32(in_vec1, in_vec2) vreinterpret_f32_u32( vext_u32( vreinterpret_u32_f32( in_vec1 ), vreinterpret_u32_f32( in_vec2 ), 1 ) )
#define	AKSIMD_TRANSPOSE_V2F32(in_vec1, in_vec2) vtrn_f32( in_vec1, in_vec2 )
#define	AKSIMD_TRANSPOSE_V4F32(in_vec1, in_vec2) vtrnq_f32( in_vec1, in_vec2 )
#define	AKSIMD_SWAP_V2F32(in_vec) vrev64_f32( in_vec )
	V1 = {a,b} => VR = {b,a}.
AkForceInline AKSIMD_V4F32	AKSIMD_UNPACKLO_V4F32 (const AKSIMD_V4F32 &in_vec1, const AKSIMD_V4F32 &in_vec2)
AkForceInline AKSIMD_V4F32	AKSIMD_UNPACKHI_V4F32 (const AKSIMD_V4F32 &in_vec1, const AKSIMD_V4F32 &in_vec2)
AkForceInline AKSIMD_V4I32	AKSIMD_PACKS_V4I32 (const AKSIMD_V4I32 &in_vec1, const AKSIMD_V4I32 &in_vec2)

Detailed Description

AKSIMD - arm_neon implementation

Definition in file AkSimd.h.

Cette page a-t-elle été utile ?

Besoin d'aide ?

Des questions ? Des problèmes ? Besoin de plus d'informations ? Contactez-nous, nous pouvons vous aider !

Visitez notre page d'Aide

Décrivez-nous de votre projet. Nous sommes là pour vous aider.

Enregistrez votre projet et nous vous aiderons à démarrer sans aucune obligation !

Partir du bon pied avec Wwise

Wwise SDK 2016.2.6

include/AK/SoundEngine/Platforms/arm_neon/AkSimd.h File Reference

Defines

Typedefs

AKSIMD shuffling

AKSIMD arithmetic

AKSIMD packing / unpacking

Detailed Description

Cette page a-t-elle été utile ?

Besoin d'aide ?

Décrivez-nous de votre projet. Nous sommes là pour vous aider.