버전

menu_open
Wwise SDK 2024.1.1
AkSimdMath.h
이 파일의 문서화 페이지로 가기
1 /***********************************************************************
2  The content of this file includes source code for the sound engine
3  portion of the AUDIOKINETIC Wwise Technology and constitutes "Level
4  Two Source Code" as defined in the Source Code Addendum attached
5  with this file. Any use of the Level Two Source Code shall be
6  subject to the terms and conditions outlined in the Source Code
7  Addendum and the End User License Agreement for Wwise(R).
8 
9  Copyright (c) 2024 Audiokinetic Inc.
10  ***********************************************************************/
11 
12 //////////////////////////////////////////////////////////////////////
13 //
14 // AkSimdMath.h
15 //
16 // Library of static functions for math computations with SIMD in mind.
17 //
18 //////////////////////////////////////////////////////////////////////
19 #ifndef _AKSIMDMATH_H_
20 #define _AKSIMDMATH_H_
21 
24 #include <AkMath.h>
25 
26 namespace AkMath
27 {
28  //Take 4 vectors <x,y,z> and return <x,x,x,x>, <y,y,y,y> and <z,z,z,z>
29  AkForceInline void PermuteVectors3(const AKSIMD_V4F32& v0, const AKSIMD_V4F32& v1, const AKSIMD_V4F32& v2, const AKSIMD_V4F32& v3,
30  AKSIMD_V4F32& out_xxxx, AKSIMD_V4F32& out_yyyy, AKSIMD_V4F32& out_zzzz)
31  {
32  AKSIMD_V4F32 xyxy0 = AKSIMD_SHUFFLE_V4F32(v0, v1, AKSIMD_SHUFFLE(1, 0, 1, 0));
33  AKSIMD_V4F32 xyxy1 = AKSIMD_SHUFFLE_V4F32(v2, v3, AKSIMD_SHUFFLE(1, 0, 1, 0));
34  out_xxxx = AKSIMD_SHUFFLE_V4F32(xyxy0, xyxy1, AKSIMD_SHUFFLE(2, 0, 2, 0));
35  out_yyyy = AKSIMD_SHUFFLE_V4F32(xyxy0, xyxy1, AKSIMD_SHUFFLE(3, 1, 3, 1));
36 
37  AKSIMD_V4F32 zwzw0 = AKSIMD_SHUFFLE_V4F32(v0, v1, AKSIMD_SHUFFLE(3, 2, 3, 2));
38  AKSIMD_V4F32 zwzw1 = AKSIMD_SHUFFLE_V4F32(v2, v3, AKSIMD_SHUFFLE(3, 2, 3, 2));
39  out_zzzz = AKSIMD_SHUFFLE_V4F32(zwzw0, zwzw1, AKSIMD_SHUFFLE(2, 0, 2, 0));
40  }
41 
42  //Take 3 vectors <x3,x2,x1,x0>, <y,y,y,y> and <z,z,z,z> and return 4 vectors <x,y,z,z>
43  AkForceInline void UnpermuteVectors3(const AKSIMD_V4F32& xxxx, const AKSIMD_V4F32& yyyy, const AKSIMD_V4F32& zzzz,
44  AKSIMD_V4F32& out_v0, AKSIMD_V4F32& out_v1, AKSIMD_V4F32& out_v2, AKSIMD_V4F32& out_v3)
45  {
46  /*__m128 _mm_shuffle_ps(__m128 lo, __m128 hi, _MM_SHUFFLE(hi3, hi2, lo1, lo0))
47  Interleave inputs into low 2 floats and high 2 floats of output.Basically
48  out[0] = lo[lo0];
49  out[1] = lo[lo1];
50  out[2] = hi[hi2];
51  out[3] = hi[hi3];
52  */
53 
54  AKSIMD_V4F32 x0x1y0y1 = AKSIMD_SHUFFLE_V4F32(xxxx, yyyy, AKSIMD_SHUFFLE(1, 0, 1, 0));
55  AKSIMD_V4F32 z0z1z0z1 = AKSIMD_SHUFFLE_V4F32(zzzz, zzzz, AKSIMD_SHUFFLE(1, 0, 1, 0));
56 
57  out_v0 = AKSIMD_SHUFFLE_V4F32(x0x1y0y1, z0z1z0z1, AKSIMD_SHUFFLE(2, 0, 2, 0));
58  out_v1 = AKSIMD_SHUFFLE_V4F32(x0x1y0y1, z0z1z0z1, AKSIMD_SHUFFLE(3, 1, 3, 1));
59 
60  AKSIMD_V4F32 x2x3y2y3 = AKSIMD_SHUFFLE_V4F32(xxxx, yyyy, AKSIMD_SHUFFLE(3, 2, 3, 2));
61  AKSIMD_V4F32 z2z3z2z3 = AKSIMD_SHUFFLE_V4F32(zzzz, zzzz, AKSIMD_SHUFFLE(3, 2, 3, 2));
62 
63  out_v2 = AKSIMD_SHUFFLE_V4F32(x2x3y2y3, z2z3z2z3, AKSIMD_SHUFFLE(2, 0, 2, 0));
64  out_v3 = AKSIMD_SHUFFLE_V4F32(x2x3y2y3, z2z3z2z3, AKSIMD_SHUFFLE(3, 1, 3, 1));
65  }
66 
67  //Take 4 vectors <x,y,z,w> and return <x,x,x,x>, <y,y,y,y>, <z,z,z,z> and <w,w,w,w>
68  AkForceInline void PermuteVectors4(const AKSIMD_V4F32& v0, const AKSIMD_V4F32& v1, const AKSIMD_V4F32& v2, const AKSIMD_V4F32& v3,
69  AKSIMD_V4F32& out_xxxx, AKSIMD_V4F32& out_yyyy, AKSIMD_V4F32& out_zzzz, AKSIMD_V4F32& out_wwww)
70  {
71  AKSIMD_V4F32 xyxy0 = AKSIMD_SHUFFLE_V4F32(v0, v1, AKSIMD_SHUFFLE(1, 0, 1, 0));
72  AKSIMD_V4F32 xyxy1 = AKSIMD_SHUFFLE_V4F32(v2, v3, AKSIMD_SHUFFLE(1, 0, 1, 0));
73  out_xxxx = AKSIMD_SHUFFLE_V4F32(xyxy0, xyxy1, AKSIMD_SHUFFLE(2, 0, 2, 0));
74  out_yyyy = AKSIMD_SHUFFLE_V4F32(xyxy0, xyxy1, AKSIMD_SHUFFLE(3, 1, 3, 1));
75 
76  AKSIMD_V4F32 zwzw0 = AKSIMD_SHUFFLE_V4F32(v0, v1, AKSIMD_SHUFFLE(3, 2, 3, 2));
77  AKSIMD_V4F32 zwzw1 = AKSIMD_SHUFFLE_V4F32(v2, v3, AKSIMD_SHUFFLE(3, 2, 3, 2));
78  out_zzzz = AKSIMD_SHUFFLE_V4F32(zwzw0, zwzw1, AKSIMD_SHUFFLE(2, 0, 2, 0));
79  out_wwww = AKSIMD_SHUFFLE_V4F32(zwzw0, zwzw1, AKSIMD_SHUFFLE(3, 1, 3, 1));
80  }
81 
82  // 3-element dot product of 4 vectors.
84  const AKSIMD_V4F32& v1_x, const AKSIMD_V4F32& v1_y, const AKSIMD_V4F32& v1_z)
85  {
86  return AKSIMD_ADD_V4F32(AKSIMD_ADD_V4F32(AKSIMD_MUL_V4F32(v0_x, v1_x), AKSIMD_MUL_V4F32(v0_y, v1_y)), AKSIMD_MUL_V4F32(v0_z, v1_z));
87  }
88 
89  // 3-element dot product of 1 common vector with 4 vectors
90  AkForceInline AKSIMD_V4F32 DotPoduct3_1x4(const AKSIMD_V4F32& v0_xyz, const AKSIMD_V4F32& v1_x, const AKSIMD_V4F32& v1_y, const AKSIMD_V4F32& v1_z)
91  {
92  const AKSIMD_V4F32 v0_x = AKSIMD_SHUFFLE_V4F32(v0_xyz, v0_xyz, AKSIMD_SHUFFLE(0, 0, 0, 0));
93  const AKSIMD_V4F32 v0_y = AKSIMD_SHUFFLE_V4F32(v0_xyz, v0_xyz, AKSIMD_SHUFFLE(1, 1, 1, 1));
94  const AKSIMD_V4F32 v0_z = AKSIMD_SHUFFLE_V4F32(v0_xyz, v0_xyz, AKSIMD_SHUFFLE(2, 2, 2, 2));
95  return DotPoduct3_4x4(v0_x, v0_y, v0_z, v1_x, v1_y, v1_z);
96  }
97 
98  // 4-element dot product of 4 vectors.
99  AkForceInline AKSIMD_V4F32 DotPoduct4_4x4(const AKSIMD_V4F32& v0_x, const AKSIMD_V4F32& v0_y, const AKSIMD_V4F32& v0_z, const AKSIMD_V4F32& v0_w,
100  const AKSIMD_V4F32& v1_x, const AKSIMD_V4F32& v1_y, const AKSIMD_V4F32& v1_z, const AKSIMD_V4F32& v1_w)
101  {
102  return AKSIMD_ADD_V4F32(
104  AKSIMD_MUL_V4F32(v0_x, v1_x),
105  AKSIMD_MUL_V4F32(v0_y, v1_y)),
107  AKSIMD_MUL_V4F32(v0_z, v1_z),
108  AKSIMD_MUL_V4F32(v0_w, v1_w)));
109  }
110 
111  // 4-element dot product of 1 common vector with 4 vectors
112  AkForceInline AKSIMD_V4F32 DotPoduct4_1x4(const AKSIMD_V4F32& v0_xyz, const AKSIMD_V4F32& v1_x, const AKSIMD_V4F32& v1_y, const AKSIMD_V4F32& v1_z, const AKSIMD_V4F32& v1_w)
113  {
114  const AKSIMD_V4F32 v0_x = AKSIMD_SHUFFLE_V4F32(v0_xyz, v0_xyz, AKSIMD_SHUFFLE(0, 0, 0, 0));
115  const AKSIMD_V4F32 v0_y = AKSIMD_SHUFFLE_V4F32(v0_xyz, v0_xyz, AKSIMD_SHUFFLE(1, 1, 1, 1));
116  const AKSIMD_V4F32 v0_z = AKSIMD_SHUFFLE_V4F32(v0_xyz, v0_xyz, AKSIMD_SHUFFLE(2, 2, 2, 2));
117  const AKSIMD_V4F32 v0_w = AKSIMD_SHUFFLE_V4F32(v0_xyz, v0_xyz, AKSIMD_SHUFFLE(2, 2, 2, 2));
118  return DotPoduct4_4x4(v0_x, v0_y, v0_z, v0_w, v1_x, v1_y, v1_z, v1_w);
119  }
120 
121  // 3-element cross product of 4 vectors, returned as XXXX, YYYY, ZZZZ
123  const AKSIMD_V4F32& u_x, const AKSIMD_V4F32& u_y, const AKSIMD_V4F32& u_z,
124  const AKSIMD_V4F32& v_x, const AKSIMD_V4F32& v_y, const AKSIMD_V4F32& v_z,
125  AKSIMD_V4F32& uXv_x, AKSIMD_V4F32& uXv_y, AKSIMD_V4F32& uXv_z
126  )
127  {
128  uXv_x = AKSIMD_SUB_V4F32(AKSIMD_MUL_V4F32(u_y, v_z), AKSIMD_MUL_V4F32(u_z, v_y));
129  uXv_y = AKSIMD_SUB_V4F32(AKSIMD_MUL_V4F32(u_z, v_x), AKSIMD_MUL_V4F32(u_x, v_z));
130  uXv_z = AKSIMD_SUB_V4F32(AKSIMD_MUL_V4F32(u_x, v_y), AKSIMD_MUL_V4F32(u_y, v_x));
131  }
132 
133  // Trig functions approximation (based on the Fast versions found in AkMath.h)
135  {
136  const AKSIMD_V4F32 B = AKSIMD_SET_V4F32(4 / PI);
137  const AKSIMD_V4F32 C = AKSIMD_SET_V4F32(-4 / (PI * PI));
138  const AKSIMD_V4F32 P = AKSIMD_SET_V4F32(0.225f);
139 
140  //float y = B * x + C * x * fabs(x); //float y = X*(B+C*fabs(x));
141 
143  y = AKSIMD_MADD_V4F32(y, C, B);
144  y = AKSIMD_MUL_V4F32(y, x);
145 
146  // return P * (y * fabs(y) - y) + y;
147  AKSIMD_V4F32 sine = AKSIMD_ABS_V4F32(y);
148  sine = AKSIMD_MSUB_V4F32(y, sine, y);
149  sine = AKSIMD_MADD_V4F32(sine, P, y);
150  return sine;
151  }
152 
154  {
155  //Compute the offset needed for the cosinus. If you compare with FastCos, the constants have been combined.
156  const AKSIMD_V4F32 offsetNoWrap = AKSIMD_SET_V4F32(PI / 2); // cos = sin(x+pi/2)
157  const AKSIMD_V4F32 offsetWrap = AKSIMD_SET_V4F32(PI / 2 - 2 * PI); // Wrap: cos(x) = cos(x - 2 pi)
158  const AKSIMD_V4F32 vHalfPI = AKSIMD_SET_V4F32(PI / 2);
159 
160  // (cond1 >= cond2) ? a : b
161  AKSIMD_V4F32 offset = AKSIMD_SEL_GTEZ_V4F32(AKSIMD_SUB_V4F32(x, vHalfPI), offsetWrap, offsetNoWrap);
162  return AKSIMD_SIN_V4F32(AKSIMD_ADD_V4F32(x, offset));
163  }
164 
166  {
167  const AKSIMD_V4F32 vNeg = AKSIMD_SET_V4F32(-1.0f);
168  const AKSIMD_V4F32 vOne = AKSIMD_SET_V4F32(1.0f);
169  const AKSIMD_V4F32 vZero = AKSIMD_SET_V4F32(0.0f);
170  const AKSIMD_V4F32 vK = AKSIMD_SET_V4F32(0.28f);
171  const AKSIMD_V4F32 vKRepro = AKSIMD_SET_V4F32(1.f / 0.28f);
172  const AKSIMD_V4F32 vHalfPI = AKSIMD_SET_V4F32(PI / 2);
173  const AKSIMD_V4F32 vPI = AKSIMD_SET_V4F32(PI);
174  const AKSIMD_V4F32 vEpsilon = AKSIMD_SET_V4F32(1e-20f);
175 
176  //Ensure x is not zero a == 0 ? b : c.
177  x = AKSIMD_VSEL_V4F32(x, vEpsilon, AKSIMD_EQ_V4F32(x, vZero));
178 
179  AKSIMD_V4F32 z = AKSIMD_DIV_V4F32(y, x);
180  AKSIMD_V4F32 absz = AKSIMD_ABS_V4F32(z);
181  AKSIMD_V4COND zcond = AKSIMD_GTEQ_V4F32(vOne, absz);
182 
183  //The approximation is done in 2 segments of the form: offset + z/a*(z*z + b);
184 
185  //if ( fabsf( z ) < 1.0f ) then use .28 for the a coef
186  AKSIMD_V4F32 a = AKSIMD_VSEL_V4F32(vNeg, vK, zcond);
187 
188  //if ( fabsf( z ) < 1.0f ) then use 1 for the b factor, else use 0.28
189  AKSIMD_V4F32 b = AKSIMD_VSEL_V4F32(vK, vKRepro, zcond);
190 
191  AKSIMD_V4F32 atan = AKSIMD_MADD_V4F32(z, z, b);
192  atan = AKSIMD_MUL_V4F32(atan, a);
193  atan = AKSIMD_DIV_V4F32(z, atan);
194 
195  //Adjust for quadrant
196  // zcond x<0 y<0 offset
197  // 1 0 0 0
198  // 1 0 1 0
199  // 1 1 0 +PI
200  // 1 1 1 -PI
201  // 0 0 0 +PI/2
202  // 0 0 1 -PI/2
203  // 0 1 0 +PI/2
204  // 0 1 1 -PI/2
205 
206  AKSIMD_V4F32 offsetByX = AKSIMD_SEL_GTEZ_V4F32(x, vZero, vPI);
207  AKSIMD_V4F32 offset = AKSIMD_VSEL_V4F32(vHalfPI, offsetByX, zcond);
208  AKSIMD_V4F32 sign = AKSIMD_SEL_GTEZ_V4F32(y, vOne, vNeg);
209 
210  //Apply computed offset.
211  atan = AKSIMD_MADD_V4F32(offset, sign, atan);
212  return atan;
213  }
214 
215  //Accepts any positive x. Compare with FastSqrt() which accepts only between ]0,1]
217  {
219  return AKSIMD_GETELEMENT_V4F32(y, 0);
220  }
221 
222  //Compute 1/sqrt(x)
224  {
226  return AKSIMD_GETELEMENT_V4F32(y, 0);
227  }
228 
230  {
232  return AKSIMD_GETELEMENT_V4F32(y, 0);
233  }
234 }
235 
236 #endif //_AKSIMDMATH_H_
#define AKSIMD_SHUFFLE(fp3, fp2, fp1, fp0)
Definition: AkSimd.h:511
AkForceInline AKSIMD_V4F32 DotPoduct3_1x4(const AKSIMD_V4F32 &v0_xyz, const AKSIMD_V4F32 &v1_x, const AKSIMD_V4F32 &v1_y, const AKSIMD_V4F32 &v1_z)
Definition: AkSimdMath.h:90
AkForceInline AKSIMD_V4F32 DotPoduct4_1x4(const AKSIMD_V4F32 &v0_xyz, const AKSIMD_V4F32 &v1_x, const AKSIMD_V4F32 &v1_y, const AKSIMD_V4F32 &v1_z, const AKSIMD_V4F32 &v1_w)
Definition: AkSimdMath.h:112
AkForceInline AKSIMD_V4F32 DotPoduct4_4x4(const AKSIMD_V4F32 &v0_x, const AKSIMD_V4F32 &v0_y, const AKSIMD_V4F32 &v0_z, const AKSIMD_V4F32 &v0_w, const AKSIMD_V4F32 &v1_x, const AKSIMD_V4F32 &v1_y, const AKSIMD_V4F32 &v1_z, const AKSIMD_V4F32 &v1_w)
Definition: AkSimdMath.h:99
#define AKSIMD_VSEL_V4F32(__a__, __b__, __c__)
Return a when control mask is 0, return b when control mask is non zero, control mask is in c and usu...
Definition: AkSimd.h:904
#define AKSIMD_GTEQ_V4F32(__a__, __b__)
Compare each float element and return control mask.
Definition: AkSimd.h:883
#define AKSIMD_RSQRT_V4F32(__a__)
Vector reciprocal square root approximation 1/sqrt(a), or equivalently, sqrt(1/a)
Definition: AkSimd.h:703
AkForceInline AKSIMD_V4F32 AKSIMD_COS_V4F32(const AKSIMD_V4F32 x)
Definition: AkSimdMath.h:153
float32x4_t AKSIMD_V4F32
Vector of 4 32-bit floats
Definition: AkSimdTypes.h:62
AkForceInline void UnpermuteVectors3(const AKSIMD_V4F32 &xxxx, const AKSIMD_V4F32 &yyyy, const AKSIMD_V4F32 &zzzz, AKSIMD_V4F32 &out_v0, AKSIMD_V4F32 &out_v1, AKSIMD_V4F32 &out_v2, AKSIMD_V4F32 &out_v3)
Definition: AkSimdMath.h:43
#define AKSIMD_MADD_V4F32(__a__, __b__, __c__)
Vector multiply-add and multiply-subtract operations (Aarch64 uses the fused-variants directly where ...
Definition: AkSimd.h:659
#define AKSIMD_SHUFFLE_V4F32(a, b, zyxw)
Definition: AkSimd.h:528
AkForceInline AKSIMD_V4F32 DotPoduct3_4x4(const AKSIMD_V4F32 &v0_x, const AKSIMD_V4F32 &v0_y, const AKSIMD_V4F32 &v0_z, const AKSIMD_V4F32 &v1_x, const AKSIMD_V4F32 &v1_y, const AKSIMD_V4F32 &v1_z)
Definition: AkSimdMath.h:83
float AkReal32
32-bit floating point
AkForceInline void CrossProduct3_4x4(const AKSIMD_V4F32 &u_x, const AKSIMD_V4F32 &u_y, const AKSIMD_V4F32 &u_z, const AKSIMD_V4F32 &v_x, const AKSIMD_V4F32 &v_y, const AKSIMD_V4F32 &v_z, AKSIMD_V4F32 &uXv_x, AKSIMD_V4F32 &uXv_y, AKSIMD_V4F32 &uXv_z)
Definition: AkSimdMath.h:122
#define AKSIMD_EQ_V4F32(__a__, __b__)
Compare each float element and return control mask.
Definition: AkSimd.h:898
AkForceInline AkReal32 FastRSqrt(AkReal32 x)
Definition: AkSimdMath.h:223
#define AKSIMD_GETELEMENT_V4F32(__vName, __num__)
Get the element at index num in vector __vName
Definition: AkSimd.h:37
#define AKSIMD_SUB_V4F32(__a__, __b__)
Definition: AkSimd.h:577
AkForceInline AKSIMD_V4F32 AKSIMD_ATAN2_V4F32(AKSIMD_V4F32 y, AKSIMD_V4F32 x)
Definition: AkSimdMath.h:165
#define AKSIMD_MSUB_V4F32(__a__, __b__, __c__)
Definition: AkSimd.h:667
#define AKSIMD_SEL_GTEZ_V4F32(__a__, __b__, __c__)
Definition: AkSimd.h:910
#define AKSIMD_SET_V4F32(__scalar__)
Definition: AkSimd.h:70
AkForceInline void PermuteVectors4(const AKSIMD_V4F32 &v0, const AKSIMD_V4F32 &v1, const AKSIMD_V4F32 &v2, const AKSIMD_V4F32 &v3, AKSIMD_V4F32 &out_xxxx, AKSIMD_V4F32 &out_yyyy, AKSIMD_V4F32 &out_zzzz, AKSIMD_V4F32 &out_wwww)
Definition: AkSimdMath.h:68
AkForceInline void PermuteVectors3(const AKSIMD_V4F32 &v0, const AKSIMD_V4F32 &v1, const AKSIMD_V4F32 &v2, const AKSIMD_V4F32 &v3, AKSIMD_V4F32 &out_xxxx, AKSIMD_V4F32 &out_yyyy, AKSIMD_V4F32 &out_zzzz)
Definition: AkSimdMath.h:29
AkForceInline AKSIMD_V4F32 AKSIMD_DIV_V4F32(AKSIMD_V4F32 a, AKSIMD_V4F32 b)
Rough estimation of division
Definition: AkSimd.h:629
AkForceInline AkReal32 FastSqrtLarge(AkReal32 x)
Definition: AkSimdMath.h:216
#define AKSIMD_RECIP_V4F32(__a__)
Reciprocal of x (1/x)
Definition: AkSimd.h:709
#define AKSIMD_ABS_V4F32(__a__)
Returns absolute value
Definition: AkSimd.h:693
#define AKSIMD_ADD_V4F32(__a__, __b__)
Definition: AkSimd.h:591
uint32x4_t AKSIMD_V4COND
Vector of 4 comparison results
Definition: AkSimdTypes.h:64
AkForceInline AKSIMD_V4F32 AKSIMD_SIN_V4F32(const AKSIMD_V4F32 x)
Definition: AkSimdMath.h:134
AkForceInline AkReal32 FastRcp(AkReal32 x)
Definition: AkSimdMath.h:229
#define AkForceInline
Definition: AkTypes.h:63
#define AKSIMD_SQRT_V4F32(__vec__)
Square root (4 floats)
Definition: AkSimd.h:700
#define AKSIMD_MUL_V4F32(__a__, __b__)
Definition: AkSimd.h:622

이 페이지가 도움이 되었나요?

지원이 필요하신가요?

질문이 있으신가요? 문제를 겪고 계신가요? 더 많은 정보가 필요하신가요? 저희에게 문의해주시면 도와드리겠습니다!

지원 페이지를 방문해 주세요

작업하는 프로젝트에 대해 알려주세요. 언제든지 도와드릴 준비가 되어 있습니다.

프로젝트를 등록하세요. 아무런 조건이나 의무 사항 없이 빠른 시작을 도와드리겠습니다.

Wwise를 시작해 보세요