Version
menu_open
Wwise SDK 2023.1.5
AkSimd.h
Go to the documentation of this file.
1 /*******************************************************************************
2 The content of this file includes portions of the AUDIOKINETIC Wwise Technology
3 released in source code form as part of the SDK installer package.
4 
5 Commercial License Usage
6 
7 Licensees holding valid commercial licenses to the AUDIOKINETIC Wwise Technology
8 may use this file in accordance with the end user license agreement provided
9 with the software or, alternatively, in accordance with the terms contained in a
10 written agreement between you and Audiokinetic Inc.
11 
12 Apache License Usage
13 
14 Alternatively, this file may be used under the Apache License, Version 2.0 (the
15 "Apache License"); you may not use this file except in compliance with the
16 Apache License. You may obtain a copy of the Apache License at
17 http://www.apache.org/licenses/LICENSE-2.0.
18 
19 Unless required by applicable law or agreed to in writing, software distributed
20 under the Apache License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES
21 OR CONDITIONS OF ANY KIND, either express or implied. See the Apache License for
22 the specific language governing permissions and limitations under the License.
23 
24  Copyright (c) 2024 Audiokinetic Inc.
25 *******************************************************************************/
26 
27 // AkSimd.h
28 
29 /// \file
30 /// AKSIMD - SSE implementation
31 
32 #pragma once
33 
36 
37 #include <xmmintrin.h>
38 #include <smmintrin.h>
39 #include <emmintrin.h>
40 #if defined(__FMA__) || defined(__AVX2__)
41 #include <immintrin.h>
42 #endif
43 
44 ////////////////////////////////////////////////////////////////////////
45 /// @name Platform specific memory size alignment for allocation purposes
46 //@{
47 #define AKSIMD_ALIGNSIZE( __Size__ ) (((__Size__) + 15) & ~15)
48 //@}
49 ////////////////////////////////////////////////////////////////////////
50 
51 ////////////////////////////////////////////////////////////////////////
52 /// @name AKSIMD loading / setting
53 //@{
54 
55 /// Loads four single-precision floating-point values from unaligned
56 /// memory (see _mm_loadu_ps)
57 #define AKSIMD_LOAD_V4F32( __addr__ ) _mm_loadu_ps( (AkReal32*)(__addr__) )
58 
59 /// Loads four single-precision floating-point values from unaligned
60 /// memory (see _mm_loadu_ps)
61 #define AKSIMD_LOADU_V4F32( __addr__ ) _mm_loadu_ps( (__addr__) )
62 
63 /// Loads a single single-precision, floating-point value, copying it into
64 /// all four words (see _mm_load1_ps, _mm_load_ps1)
65 #define AKSIMD_LOAD1_V4F32( __scalar__ ) _mm_load1_ps( &(__scalar__) )
66 
67 /// Sets the four single-precision, floating-point values to in_value (see
68 /// _mm_set1_ps, _mm_set_ps1)
69 #define AKSIMD_SET_V4F32( __scalar__ ) _mm_set_ps1( (__scalar__) )
70 
71 /// Sets the two double-precision, floating-point values to in_value
72 #define AKSIMD_SETV_V2F64( _b, _a ) _mm_castpd_ps(_mm_set_pd( (_b), (_a) ))
73 
74 /// Populates the full vector with the 4 floating point elements provided
75 #define AKSIMD_SETV_V4F32( _d, _c, _b, _a ) _mm_set_ps( (_d), (_c), (_b), (_a) )
76 
77 /// Populates the full vector with the mask[3:0], setting each to 0 or ~0
79 {
80  __m128i temp = _mm_set_epi32(8, 4, 2, 1);
81  __m128i xvec = _mm_set1_epi32(x);
82  __m128i xand = _mm_and_si128(xvec, temp);
83  return _mm_castsi128_ps(_mm_cmpeq_epi32(temp, xand));
84 }
85 
86 /// Sets the four single-precision, floating-point values to zero (see
87 /// _mm_setzero_ps)
88 #define AKSIMD_SETZERO_V4F32() _mm_setzero_ps()
89 
90 /// Loads a single-precision, floating-point value into the low word
91 /// and clears the upper three words.
92 /// r0 := *p; r1 := 0.0 ; r2 := 0.0 ; r3 := 0.0 (see _mm_load_ss)
93 #define AKSIMD_LOAD_SS_V4F32( __addr__ ) _mm_load_ss( (__addr__) )
94 
95 //@}
96 ////////////////////////////////////////////////////////////////////////
97 
98 
99 ////////////////////////////////////////////////////////////////////////
100 /// @name AKSIMD storing
101 //@{
102 
103 /// Stores four single-precision, floating-point values. The address
104 /// does not need to be 16-byte aligned (see _mm_storeu_ps).
105 #define AKSIMD_STORE_V4F32( __addr__, __vec__ ) _mm_storeu_ps( (AkReal32*)(__addr__), (__vec__) )
106 
107 /// Stores four single-precision, floating-point values. The address
108 /// does not need to be 16-byte aligned (see _mm_storeu_ps).
109 #define AKSIMD_STOREU_V4F32( __addr__, __vec__ ) _mm_storeu_ps( (AkReal32*)(__addr__), (__vec__) )
110 
111 /// Stores the lower single-precision, floating-point value.
112 /// *p := a0 (see _mm_store_ss)
113 #define AKSIMD_STORE1_V4F32( __addr__, __vec__ ) _mm_store_ss( (AkReal32*)(__addr__), (__vec__) )
114 
115 /// Stores the lower double-precision, floating-point value.
116 /// *p := a0 (see _mm_store_sd)
117 #define AKSIMD_STORE1_V2F64( __addr__, __vec__ ) _mm_store_sd( (AkReal64*)(__addr__), _mm_castps_pd(__vec__) )
118 
119 //@}
120 ////////////////////////////////////////////////////////////////////////
121 
122 ////////////////////////////////////////////////////////////////////////
123 /// @name AKSIMD shuffling
124 //@{
125 
126 // Macro for shuffle parameter for AKSIMD_SHUFFLE_V4F32() (see _MM_SHUFFLE)
127 #define AKSIMD_SHUFFLE( fp3, fp2, fp1, fp0 ) _MM_SHUFFLE( (fp3), (fp2), (fp1), (fp0) )
128 
129 /// Selects four specific single-precision, floating-point values from
130 /// a and b, based on the mask i (see _mm_shuffle_ps)
131 // Usage: AKSIMD_SHUFFLE_V4F32( vec1, vec2, AKSIMD_SHUFFLE( z, y, x, w ) )
132 #define AKSIMD_SHUFFLE_V4F32( a, b, i ) _mm_shuffle_ps( a, b, i )
133 
134 #define AKSIMD_SHUFFLE_V4I32( a, b, i ) _mm_castps_si128(_mm_shuffle_ps( _mm_castsi128_ps(a), _mm_castsi128_ps(b), i ))
135 
136 /// Moves the upper two single-precision, floating-point values of b to
137 /// the lower two single-precision, floating-point values of the result.
138 /// The upper two single-precision, floating-point values of a are passed
139 /// through to the result.
140 /// r3 := a3; r2 := a2; r1 := b3; r0 := b2 (see _mm_movehl_ps)
141 #define AKSIMD_MOVEHL_V4F32( a, b ) _mm_movehl_ps( a, b )
142 
143 /// Moves the lower two single-precision, floating-point values of b to
144 /// the upper two single-precision, floating-point values of the result.
145 /// The lower two single-precision, floating-point values of a are passed
146 /// through to the result.
147 /// r3 := b1 ; r2 := b0 ; r1 := a1 ; r0 := a0 (see _mm_movelh_ps)
148 #define AKSIMD_MOVELH_V4F32( a, b ) _mm_movelh_ps( a, b )
149 
150 /// Swap the 2 lower floats together and the 2 higher floats together.
151 #define AKSIMD_SHUFFLE_BADC( __a__ ) _mm_shuffle_ps( (__a__), (__a__), _MM_SHUFFLE(2,3,0,1))
152 
153 /// Swap the 2 lower floats with the 2 higher floats.
154 #define AKSIMD_SHUFFLE_CDAB( __a__ ) _mm_shuffle_ps( (__a__), (__a__), _MM_SHUFFLE(1,0,3,2))
155 
156 /// Barrel-shift all floats by one.
157 #define AKSIMD_SHUFFLE_BCDA( __a__ ) AKSIMD_SHUFFLE_V4F32( (__a__), (__a__), _MM_SHUFFLE(0,3,2,1))
158 
159 /// Duplicates the odd items into the even items (d c b a -> d d b b )
160 #define AKSIMD_DUP_ODD(__vv) AKSIMD_SHUFFLE_V4F32(__vv, __vv, AKSIMD_SHUFFLE(3,3,1,1))
161 
162 /// Duplicates the even items into the odd items (d c b a -> c c a a )
163 #define AKSIMD_DUP_EVEN(__vv) AKSIMD_SHUFFLE_V4F32(__vv, __vv, AKSIMD_SHUFFLE(2,2,0,0))
164 //@}
165 ////////////////////////////////////////////////////////////////////////
166 
167 
168 ////////////////////////////////////////////////////////////////////////
169 /// @name AKSIMD arithmetic
170 //@{
171 
172 /// Subtracts the four single-precision, floating-point values of
173 /// a and b (a - b) (see _mm_sub_ps)
174 #define AKSIMD_SUB_V4F32( a, b ) _mm_sub_ps( a, b )
175 
176 /// Subtracts the lower single-precision, floating-point values of a and b.
177 /// The upper three single-precision, floating-point values are passed through from a.
178 /// r0 := a0 - b0 ; r1 := a1 ; r2 := a2 ; r3 := a3 (see _mm_sub_ss)
179 #define AKSIMD_SUB_SS_V4F32( a, b ) _mm_sub_ss( a, b )
180 
181 /// Adds the four single-precision, floating-point values of
182 /// a and b (see _mm_add_ps)
183 #define AKSIMD_ADD_V4F32( a, b ) _mm_add_ps( a, b )
184 
185 /// Adds the lower single-precision, floating-point values of a and b; the
186 /// upper three single-precision, floating-point values are passed through from a.
187 /// r0 := a0 + b0; r1 := a1; r2 := a2; r3 := a3 (see _mm_add_ss)
188 #define AKSIMD_ADD_SS_V4F32( a, b ) _mm_add_ss( a, b )
189 
190 /// Multiplies the four single-precision, floating-point values
191 /// of a and b (see _mm_mul_ps)
192 #define AKSIMD_MUL_V4F32( a, b ) _mm_mul_ps( a, b )
193 
194 #define AKSIMD_DIV_V4F32( a, b ) _mm_div_ps( a, b )
195 
196 /// Multiplies the lower single-precision, floating-point values of
197 /// a and b; the upper three single-precision, floating-point values
198 /// are passed through from a.
199 /// r0 := a0 * b0; r1 := a1; r2 := a2; r3 := a3 (see _mm_add_ss)
200 #define AKSIMD_MUL_SS_V4F32( a, b ) _mm_mul_ss( a, b )
201 
202 /// Vector multiply-add operation. (if we're targeting a platform or arch with FMA, (AVX2 implies FMA) using the fma intrinsics directly tends to be slightly more desirable)
203 #if defined(__FMA__) || defined(__AVX2__)
204 #define AKSIMD_MADD_V4F32( __a__, __b__, __c__ ) _mm_fmadd_ps( (__a__), (__b__) , (__c__) )
205 #define AKSIMD_MSUB_V4F32( __a__, __b__, __c__ ) _mm_fmsub_ps( (__a__), (__b__) , (__c__) )
206 #else
207 #define AKSIMD_MADD_V4F32( __a__, __b__, __c__ ) _mm_add_ps( _mm_mul_ps( (__a__), (__b__) ), (__c__) )
208 #define AKSIMD_MSUB_V4F32( __a__, __b__, __c__ ) _mm_sub_ps( _mm_mul_ps( (__a__), (__b__) ), (__c__) )
209 #endif
210 
211 /// Vector multiply-add operation.
212 #define AKSIMD_MADD_SS_V4F32( __a__, __b__, __c__ ) _mm_add_ss( _mm_mul_ss( (__a__), (__b__) ), (__c__) )
213 
214 /// Computes the minima of the four single-precision, floating-point
215 /// values of a and b (see _mm_min_ps)
216 #define AKSIMD_MIN_V4F32( a, b ) _mm_min_ps( a, b )
217 
218 /// Computes the maximums of the four single-precision, floating-point
219 /// values of a and b (see _mm_max_ps)
220 #define AKSIMD_MAX_V4F32( a, b ) _mm_max_ps( a, b )
221 
222 /// Computes the absolute value
223 #define AKSIMD_ABS_V4F32( a ) _mm_andnot_ps(_mm_set1_ps(-0.f), a)
224 
225 /// Changes the sign
226 #define AKSIMD_NEG_V4F32( __a__ ) _mm_xor_ps(_mm_set1_ps(-0.f), __a__)
227 
228 /// Vector square root aproximation (see _mm_sqrt_ps)
229 #define AKSIMD_SQRT_V4F32( __a__ ) _mm_sqrt_ps( (__a__) )
230 
231 /// Vector reciprocal square root approximation 1/sqrt(a), or equivalently, sqrt(1/a)
232 #define AKSIMD_RSQRT_V4F32( __a__ ) _mm_rsqrt_ps( (__a__) )
233 
234 /// Reciprocal of x (1/x)
235 #define AKSIMD_RECIP_V4F32(__a__) _mm_rcp_ps(__a__)
236 
237 /// Binary xor for single-precision floating-point
238 #define AKSIMD_XOR_V4F32( a, b ) _mm_xor_ps(a,b)
239 
240 /// Rounds to upper value
242 {
243  static const AKSIMD_V4F32 vEpsilon = { 0.49999f, 0.49999f, 0.49999f, 0.49999f };
244  return _mm_cvtepi32_ps(_mm_cvtps_epi32(_mm_add_ps(x, vEpsilon)));
245 }
246 
247 /// Faked in-place vector horizontal add - each element will represent sum of all elements
248 /// \akwarning
249 /// Don't expect this to be very efficient.
250 /// \endakwarning
252 {
253  __m128 vAb = _mm_shuffle_ps(vVec, vVec, 0xB1);
254  __m128 vHaddAb = _mm_add_ps(vVec, vAb);
255  __m128 vHaddCd = _mm_shuffle_ps(vHaddAb, vHaddAb, 0x4E);
256  __m128 vHaddAbcd = _mm_add_ps(vHaddAb, vHaddCd);
257  return vHaddAbcd;
258 }
259 
261 {
262  AKSIMD_V4F32 vfDotProduct = AKSIMD_MUL_V4F32( vVec, vfSigns );
263  return AKSIMD_HORIZONTALADD_V4F32( vfDotProduct );
264 }
265 
266 /// Cross-platform SIMD multiplication of 2 complex data elements with interleaved real and imaginary parts
268 {
269  static const AKSIMD_V4F32 vSign = { -0.f, 0.f, -0.f, 0.f };
270 
271  AKSIMD_V4F32 vTmp1 = AKSIMD_SHUFFLE_V4F32( vCIn1, vCIn1, AKSIMD_SHUFFLE(2,2,0,0));
272  vTmp1 = AKSIMD_MUL_V4F32( vTmp1, vCIn2 );
273  AKSIMD_V4F32 vTmp2 = AKSIMD_SHUFFLE_V4F32( vCIn1, vCIn1, AKSIMD_SHUFFLE(3,3,1,1));
274  vTmp2 = AKSIMD_XOR_V4F32( vTmp2, vSign );
275  vTmp2 = AKSIMD_MADD_V4F32( vTmp2, AKSIMD_SHUFFLE_BADC( vCIn2 ), vTmp1 );
276  return vTmp2;
277 }
278 
279 #ifdef AK_SSE3
280 
281 #include <pmmintrin.h>
282 
283 /// Cross-platform SIMD multiplication of 2 complex data elements with interleaved real and imaginary parts
284 static AKSIMD_V4F32 AKSIMD_COMPLEXMUL_SSE3( const AKSIMD_V4F32 vCIn1, const AKSIMD_V4F32 vCIn2 )
285 {
286  AKSIMD_V4F32 vXMM0 = _mm_moveldup_ps(vCIn1); // multiplier real (a1, a1, a0, a0)
287  vXMM0 = AKSIMD_MUL_V4F32(vXMM0, vCIn2); // temp1 (a1d1, a1c1, a0d0, a0c0)
288  AKSIMD_V4F32 xMM1 = _mm_shuffle_ps(vCIn2, vCIn2, 0xB1); // shuf multiplicand(c1, d1, c0, d0)
289  AKSIMD_V4F32 xMM2 = _mm_movehdup_ps(vCIn1); // multiplier imag (b1, b1, b0, b0)
290  xMM2 = AKSIMD_MUL_V4F32( xMM2, xMM1); // temp2 (b1c1, b1d1, b0c0, b0d0)
291  AKSIMD_V4F32 vCOut = _mm_addsub_ps(vXMM0, xMM2); // b1c1+a1d1, a1c1-b1d1, a0d0+b0d0, a0c0-b0c0
292  return vCOut;
293 }
294 
295 #endif
296 
297 #if __SSE3__
298 
299 // Alternatively add and subtract packed single-precision (32-bit) floating-point elements in a
300 // to/from packed elements in b, and store the results in dst.
301 #define AKSIMD_ADDSUB_V4F32( a, b ) _mm_addsub_ps( a, b)
302 
303 #else
304 
305 // Alternatively add and subtract packed single-precision (32-bit) floating-point elements in a
306 // to/from packed elements in b, and store the results in dst.
307 #define AKSIMD_ADDSUB_V4F32( a, b ) _mm_add_ps( a, _mm_xor_ps(b, AKSIMD_SETV_V4F32(0.f, -0.f, 0.f, -0.f)))
308 
309 #endif
310 
311 #if defined _MSC_VER && ( _MSC_VER <= 1600 )
312  #define AKSIMD_ASSERTFLUSHZEROMODE AKASSERT( _MM_GET_FLUSH_ZERO_MODE(dummy) == _MM_FLUSH_ZERO_ON )
313 #elif defined(AK_CPU_X86) || defined(AK_CPU_X86_64)
314  #define AKSIMD_ASSERTFLUSHZEROMODE AKASSERT( _MM_GET_FLUSH_ZERO_MODE() == _MM_FLUSH_ZERO_ON )
315 #else
316  #define AKSIMD_ASSERTFLUSHZEROMODE
317 #endif
318 
319 //@}
320 ////////////////////////////////////////////////////////////////////////
321 
322 
323 ////////////////////////////////////////////////////////////////////////
324 /// @name AKSIMD integer arithmetic
325 //@{
326 
327 /// Adds the four integer values of a and b
328 #define AKSIMD_ADD_V4I32( a, b ) _mm_add_epi32( a, b )
329 
330 #define AKSIMD_CMPLT_V4I32( a, b ) _mm_cmplt_epi32(a,b)
331 #define AKSIMD_CMPGT_V4I32( a, b ) _mm_cmpgt_epi32(a,b)
332 #define AKSIMD_OR_V4I32( a, b ) _mm_or_si128(a,b)
333 #define AKSIMD_XOR_V4I32( a, b ) _mm_xor_si128(a,b)
334 #define AKSIMD_SUB_V4I32( a, b ) _mm_sub_epi32(a,b)
335 #define AKSIMD_NOT_V4I32( a ) _mm_xor_si128(a,_mm_set1_epi32(~0))
336 
337 #define AKSIMD_OR_V4F32( a, b ) _mm_or_ps(a,b)
338 #define AKSIMD_AND_V4F32( a, b ) _mm_and_ps(a,b)
339 #define AKSIMD_ANDNOT_V4F32( a, b ) _mm_andnot_ps(a,b)
340 #define AKSIMD_NOT_V4F32( a ) _mm_xor_ps(a,_mm_castsi128_ps(_mm_set1_epi32(~0)))
341 
342 #define AKSIMD_OR_V4COND( a, b ) _mm_or_ps(a,b)
343 #define AKSIMD_AND_V4COND( a, b ) _mm_and_ps(a,b)
344 
345 /// Multiplies the low 16bits of a by b and stores it in V4I32 (no overflow)
346 #define AKSIMD_MULLO16_V4I32( a , b) _mm_mullo_epi16(a, b)
347 
348 /// Multiplies the low 32bits of a by b and stores it in V4I32 (no overflow)
350 {
351 #ifdef __SSE4_1__ // use SSE 4.1 version directly where possible
352  return _mm_mullo_epi32(vIn1, vIn2);
353 #else // use SSE 2 otherwise
354  __m128i tmp1 = _mm_mul_epu32(vIn1, vIn2); // mul 2,0
355  __m128i tmp2 = _mm_mul_epu32(_mm_srli_si128(vIn1, 4), _mm_srli_si128(vIn2, 4)); // mul 3,1
356  return _mm_unpacklo_epi32(_mm_shuffle_epi32(tmp1, _MM_SHUFFLE(0, 0, 2, 0)), _mm_shuffle_epi32(tmp2, _MM_SHUFFLE(0, 0, 2, 0))); // shuffle results to [63..0] and pack
357 #endif
358 }
359 
360 //@}
361 ////////////////////////////////////////////////////////////////////////
362 
363 
364 ////////////////////////////////////////////////////////////////////////
365 /// @name AKSIMD packing / unpacking
366 //@{
367 
368 /// Selects and interleaves the lower two single-precision, floating-point
369 /// values from a and b (see _mm_unpacklo_ps)
370 #define AKSIMD_UNPACKLO_V4F32( a, b ) _mm_unpacklo_ps( a, b )
371 
372 /// Selects and interleaves the upper two single-precision, floating-point
373 /// values from a and b (see _mm_unpackhi_ps)
374 #define AKSIMD_UNPACKHI_V4F32( a, b ) _mm_unpackhi_ps( a, b )
375 
376 // Given four pointers, gathers 32-bits of data from each location,
377 // deinterleaves them as 16-bits of each, and sign-extends to 32-bits
378 // e.g. (*addr[0]) := (b a)
379 // e.g. (*addr[1]) := (d c)
380 // e.g. (*addr[2]) := (f e)
381 // e.g. (*addr[3]) := (h g)
382 // return struct has
383 // val[0] := (g e c a)
384 // val[1] := (h f d b)
386 {
387  __m128i data[4] = {
388  _mm_set1_epi32(*(AkInt32*)addr0),
389  _mm_set1_epi32(*(AkInt32*)addr1),
390  _mm_set1_epi32(*(AkInt32*)addr2),
391  _mm_set1_epi32(*(AkInt32*)addr3),
392  };
393 
394  __m128i group[2] = {
395  _mm_unpacklo_epi32(data[0], data[1]),
396  _mm_unpacklo_epi32(data[2], data[3]),
397  };
398 
399  __m128i shuffle = _mm_unpacklo_epi64(group[0], group[1]);
400 
401  AKSIMD_V4I32X2 ret{
402  _mm_srai_epi32(_mm_slli_epi32(shuffle, 16), 16),
403  _mm_srai_epi32(shuffle, 16)
404  };
405  return ret;
406 }
407 
408 // Given four pointers, gathers 64-bits of data from each location,
409 // deinterleaves them as 16-bits of each, and sign-extends to 32-bits
410 // e.g. (*addr[0]) := (d c b a)
411 // e.g. (*addr[1]) := (h g f e)
412 // e.g. (*addr[2]) := (l k j i)
413 // e.g. (*addr[3]) := (p o n m)
414 // return struct has
415 // val[0] := (m i e a)
416 // val[1] := (n j f b)
417 // val[2] := (o k g c)
418 // val[3] := (p l h d)
419 
421 {
422  __m128i data[4] = {
423  _mm_set1_epi64x(*(AkInt64*)addr0),
424  _mm_set1_epi64x(*(AkInt64*)addr1),
425  _mm_set1_epi64x(*(AkInt64*)addr2),
426  _mm_set1_epi64x(*(AkInt64*)addr3),
427  };
428 
429  __m128i group[2] = {
430  _mm_unpacklo_epi64(data[0], data[1]),
431  _mm_unpacklo_epi64(data[2], data[3]),
432  };
433 
434  __m128i shuffle[2] = {
435  _mm_castps_si128 (_mm_shuffle_ps(_mm_castsi128_ps(group[0]), _mm_castsi128_ps(group[1]), 0x88)),
436  _mm_castps_si128 (_mm_shuffle_ps(_mm_castsi128_ps(group[0]), _mm_castsi128_ps(group[1]), 0xDD)),
437  };
438 
439  AKSIMD_V4I32X4 ret{
440  _mm_srai_epi32(_mm_slli_epi32(shuffle[0],16),16),
441  _mm_srai_epi32(shuffle[0],16),
442  _mm_srai_epi32(_mm_slli_epi32(shuffle[1],16),16),
443  _mm_srai_epi32(shuffle[1],16),
444  };
445  return ret;
446 }
447 
448 //@}
449 ////////////////////////////////////////////////////////////////////////
450 
451 ////////////////////////////////////////////////////////////////////////
452 /// @name AKSIMD vector comparison
453 /// Apart from AKSIMD_SEL_GTEQ_V4F32, these implementations are limited to a few platforms.
454 //@{
455 
456 #define AKSIMD_CMP_CTRLMASK __m128
457 
458 /// Vector "<=" operation (see _mm_cmple_ps)
459 #define AKSIMD_LTEQ_V4F32( __a__, __b__ ) _mm_cmple_ps( (__a__), (__b__) )
460 
461 #define AKSIMD_LT_V4F32( __a__, __b__ ) _mm_cmplt_ps( (__a__), (__b__) )
462 
463 /// Vector ">=" operation (see _mm_cmple_ps)
464 #define AKSIMD_GTEQ_V4F32( __a__, __b__ ) _mm_cmpge_ps( (__a__), (__b__) )
465 
466 #define AKSIMD_GT_V4F32( __a__, __b__ ) _mm_cmpgt_ps( (__a__), (__b__) )
467 
468 /// Vector "==" operation (see _mm_cmpeq_ps)
469 #define AKSIMD_EQ_V4F32( __a__, __b__ ) _mm_cmpeq_ps( (__a__), (__b__) )
470 
471 /// Return a when control mask is 0, return b when control mask is non zero, control mask is in c and usually provided by above comparison operations
473 {
474 #if defined(__SSE4_1__)
475  return _mm_blendv_ps(vA, vB, vMask);
476 #else
477  vB = _mm_and_ps( vB, vMask );
478  vA= _mm_andnot_ps( vMask, vA );
479  return _mm_or_ps( vA, vB );
480 #endif
481 }
482 
483 // (cond1 >= cond2) ? b : a.
484 #define AKSIMD_SEL_GTEQ_V4F32( __a__, __b__, __cond1__, __cond2__ ) AKSIMD_VSEL_V4F32( __a__, __b__, AKSIMD_GTEQ_V4F32( __cond1__, __cond2__ ) )
485 
486 // a >= 0 ? b : c ... Written, like, you know, the normal C++ operator syntax.
487 #define AKSIMD_SEL_GTEZ_V4F32( __a__, __b__, __c__ ) AKSIMD_VSEL_V4F32( (__c__), (__b__), AKSIMD_GTEQ_V4F32( __a__, _mm_set1_ps(0) ) )
488 
489 #define AKSIMD_SPLAT_V4F32(var, idx) AKSIMD_SHUFFLE_V4F32(var,var, AKSIMD_SHUFFLE(idx,idx,idx,idx))
490 
491 #define AKSIMD_MASK_V4F32( __a__ ) _mm_movemask_ps( __a__ )
492 
493 // returns true if every element of the provided vector is zero
495 {
496  return _mm_movemask_epi8(_mm_cmpeq_epi32(a, _mm_setzero_si128())) == 0xFFFF;
497 }
498 #define AKSIMD_TESTZERO_V4F32( __a__ ) AKSIMD_TESTZERO_V4I32(_mm_castps_si128(__a__))
499 #define AKSIMD_TESTZERO_V4COND( __a__ ) AKSIMD_TESTZERO_V4F32(__a__)
500 
501 // returns true if every element of the provided vector is ones
503 {
504  return _mm_movemask_epi8(_mm_cmpeq_epi32(a, _mm_set1_epi32(~0))) == 0xFFFF;
505 }
506 #define AKSIMD_TESTONES_V4F32( __a__ ) AKSIMD_TESTONES_V4I32(_mm_castps_si128(__a__))
507 #define AKSIMD_TESTONES_V4COND( __a__ ) AKSIMD_TESTONES_V4F32(__a__)
508 
509 //@}
510 ////////////////////////////////////////////////////////////////////////
511 
512 /// Loads unaligned 128-bit value (see _mm_loadu_si128)
513 #define AKSIMD_LOADU_V4I32( __addr__ ) _mm_loadu_si128( (__addr__) )
514 
515 /// Loads aligned 128-bit value (see _mm_loadu_si128)
516 #define AKSIMD_LOAD_V4I32( __addr__ ) _mm_loadu_si128( (__addr__) )
517 
518 /// Sets the four 32-bit integer values to zero (see _mm_setzero_si128)
519 #define AKSIMD_SETZERO_V4I32() _mm_setzero_si128()
520 
521 #define AKSIMD_SET_V4I32( __scalar__ ) _mm_set1_epi32( (__scalar__) )
522 
523 #define AKSIMD_SETV_V4I32( _d, _c, _b, _a ) _mm_set_epi32( (_d), (_c), (_b), (_a) )
524 
525 #define AKSIMD_SETV_V2I64( _b, _a ) _mm_set_epi64x( (_b), (_a) )
526 
527 /// Sets the 32b integer i at the location specified by index in a
528 #define AKSIMD_INSERT_V4I32( a, i, index) _mm_insert_epi32(a, i, index)
529 
530 /// Sets the 64b integer i at the location specified by index in a
531 #define AKSIMD_INSERT_V2I64( a, i, index) _mm_insert_epi64(a, i, index)
532 
533 /// Stores four 32-bit integer values. The address
534 /// does not need to be 16-byte aligned (see _mm_storeu_si128).
535 #define AKSIMD_STORE_V4I32( __addr__, __vec__ ) _mm_storeu_si128( (__m128i*)(__addr__), (__vec__) )
536 
537 /// Stores four 32-bit integer values. The address
538 /// does not need to be 16-byte aligned (see _mm_storeu_si128).
539 #define AKSIMD_STOREU_V4I32( __addr__, __vec__ ) _mm_storeu_si128( (__m128i*)(__addr__), (__vec__) )
540 
541 ////////////////////////////////////////////////////////////////////////
542 /// @name AKSIMD conversion
543 //@{
544 
545 /// Converts the four signed 32-bit integer values of a to single-precision,
546 /// floating-point values (see _mm_cvtepi32_ps)
547 #define AKSIMD_CONVERT_V4I32_TO_V4F32( __vec__ ) _mm_cvtepi32_ps( (__vec__) )
548 
549 /// Converts the four single-precision, floating-point values of a to signed
550 /// 32-bit integer values by rounding (see _mm_cvtps_epi32)
551 #define AKSIMD_ROUND_V4F32_TO_V4I32( __vec__ ) _mm_cvtps_epi32( (__vec__) )
552 
553 /// Converts the four single-precision, floating-point values of a to signed
554 /// 32-bit integer values by truncating (see _mm_cvttps_epi32)
555 #define AKSIMD_TRUNCATE_V4F32_TO_V4I32( __vec__ ) _mm_cvttps_epi32( (__vec__) )
556 
557 /// Computes the bitwise AND of the 128-bit value in a and the
558 /// 128-bit value in b (see _mm_and_si128)
559 #define AKSIMD_AND_V4I32( __a__, __b__ ) _mm_and_si128( (__a__), (__b__) )
560 
561 /// Compares the 8 signed 16-bit integers in a and the 8 signed
562 /// 16-bit integers in b for greater than (see _mm_cmpgt_epi16)
563 #define AKSIMD_CMPGT_V8I16( __a__, __b__ ) _mm_cmpgt_epi16( (__a__), (__b__) )
564 
565 /// Converts the 4 half-precision floats in the lower 64-bits of the provided
566 /// vector to 4 full-precision floats
567 #define AKSIMD_CONVERT_V4F16_TO_V4F32_LO(__vec__) AKSIMD_CONVERT_V4F16_TO_V4F32_HELPER( _mm_unpacklo_epi16(_mm_setzero_si128(), __vec__))
568 
569 /// Converts the 4 half-precision floats in the upper 64-bits of the provided
570 /// vector to 4 full-precision floats
571 #define AKSIMD_CONVERT_V4F16_TO_V4F32_HI(__vec__) AKSIMD_CONVERT_V4F16_TO_V4F32_HELPER( _mm_unpackhi_epi16(_mm_setzero_si128(), __vec__))
572 
574 {
575  __m128i expMantData = _mm_and_si128(vec, _mm_set1_epi32(0x7fff0000));
576  __m128i expMantShifted = _mm_srli_epi32(expMantData, 3); // shift so that the float16 exp/mant is now split along float32's bounds
577 
578  // magic number to get scale fp16 exp range into fp32 exp range (also renormalize any denorms)
579  __m128i expMantFloat = _mm_castps_si128(_mm_mul_ps(_mm_castsi128_ps(expMantShifted), _mm_castsi128_ps(_mm_set1_epi32(0x77800000))));
580 
581  // if fp16 val was inf or nan, preserve the inf/nan exponent field (we can just 'or' the new inf-bits into the attempt at scaling from inf previously)
582  __m128i infnanCheck = _mm_cmpgt_epi32(expMantData, _mm_set1_epi32(0x7bffffff));
583  __m128i infnanExp = _mm_and_si128(infnanCheck, _mm_set1_epi32(255 << 23));
584  __m128i expMantWithInfNan = _mm_or_si128(expMantFloat, infnanExp);
585 
586  // reincorporate the sign
587  __m128i signData = _mm_and_si128(vec, _mm_set1_epi32(0x80000000));
588  __m128 assembledFloat = _mm_castsi128_ps(_mm_or_si128(signData, expMantWithInfNan));
589  return assembledFloat;
590 }
591 
592 /// Converts the 4 full-precision floats vector to 4 half-precision floats
593 /// occupying the lower bits and leaving the upper bits as zero
595 {
596  __m128i signData = _mm_and_si128(_mm_castps_si128(vec), _mm_set1_epi32(0x80000000));
597  __m128i unsignedVec = _mm_and_si128(_mm_castps_si128(vec), _mm_set1_epi32(0x7fffffff));
598 
599  // do the processing for values that will be denormed in float16
600  // Add 0.5 to get value within range, and rounde; then move mantissa data up
601  __m128 denormedVec = _mm_add_ps(_mm_castsi128_ps(unsignedVec), _mm_set1_ps(0.5f));
602  __m128i denormResult = _mm_slli_epi32(_mm_castps_si128(denormedVec), 16);
603 
604  // processing for values that will be normal in float16
605  __m128i subnormMagic = _mm_set1_epi32(0xC8000FFF); // -131072 + rounding bias
606  __m128i normRoundPart1 = _mm_add_epi32(unsignedVec, subnormMagic);
607  __m128i mantLsb = _mm_slli_epi32(unsignedVec, 31 - 13);
608  __m128i mantSignExtendLsb = _mm_srai_epi32(mantLsb, 31); // Extend Lsb so that it's -1 when set
609  __m128i normRoundPart2 = _mm_sub_epi32(normRoundPart1, mantSignExtendLsb); // and subtract the sign-extended bit to finish rounding up
610  __m128i normResult = _mm_slli_epi32(normRoundPart2, 3);
611 
612  // Combine the norm and subnorm paths together
613  __m128i normalMinimum = _mm_set1_epi32((127 - 14) << 23); // smallest float32 that yields a normalized float16
614  __m128i denormMask = _mm_cmpgt_epi32(normalMinimum, unsignedVec);
615 
616  __m128i nonNanFloat = _mm_or_si128(_mm_and_si128(denormMask, denormResult), _mm_andnot_si128(denormMask, normResult));
617 
618  // apply inf/nan check
619  __m128i isNotInfNanMask = _mm_cmplt_epi32(unsignedVec, _mm_set1_epi32(0x47800000)); // test if the value will be greater than the max representable by float16
620  __m128i mantissaData = _mm_and_si128(unsignedVec, _mm_set1_epi32(0x007fffff));
621  __m128i isNanMask = _mm_cmpgt_epi32(unsignedVec, _mm_set1_epi32(0x7F800000)); // mark the parts of the vector where we have a mantissa (i.e. NAN) as 0xffffffff
622  __m128i nantissaBit = _mm_and_si128(isNanMask, _mm_set1_epi32(0x02000000)); // set the NaN mantissa bit if mantissa suggests this is NaN
623  __m128i infData = _mm_andnot_si128(mantissaData, _mm_set1_epi32(0x7c000000)); // grab the exponent data from unsigned vec with no mantissa
624  __m128i infNanFloat = _mm_or_si128(infData, nantissaBit); // if we have a non-zero mantissa, add the NaN mantissa bit
625 
626  __m128i resultWithInfNan = _mm_or_si128(_mm_and_si128(isNotInfNanMask, nonNanFloat), _mm_andnot_si128(isNotInfNanMask, infNanFloat));
627 
628  // reincorporate the original sign
629  __m128i signedResult = _mm_or_si128(signData, resultWithInfNan);
630 
631  // store results packed in lower 64 bits, and set upper 64 to zero
632  __m128i resultEpi16Lo = _mm_shufflelo_epi16(signedResult, 0xD); // move 16b ints (x,x,x,x,d,c,b,a) down to (x,x,x,x,x,x,d,b)
633  __m128i resultEpi16Hi = _mm_shufflehi_epi16(signedResult, 0xD); // move 16b ints (h,g,f,e,x,x,x,x) down to (x,x,h,f,x,x,x,x)
634  __m128 resultEpi16 = _mm_shuffle_ps(_mm_castsi128_ps(resultEpi16Lo), _mm_castsi128_ps(resultEpi16Hi), 0xE4); // combine - (x, x, h, f, x, x, d, b)
635  __m128i result = _mm_castps_si128(_mm_shuffle_ps(resultEpi16, _mm_setzero_ps(), 0x8)); // reshuffle with zero - (0,0,0,0,h,f,d,b)
636 
637  return result;
638 }
639 
640 //@}
641 ////////////////////////////////////////////////////////////////////////
642 
643 ////////////////////////////////////////////////////////////////////////
644 /// @name AKSIMD cast
645 //@{
646 
647 /// Cast vector of type AKSIMD_V2F64 to type AKSIMD_V4F32. This intrinsic is only
648 /// used for compilation and does not generate any instructions, thus it has zero latency.
649 #define AKSIMD_CAST_V2F64_TO_V4F32( __vec__ ) _mm_castpd_ps(__vec__)
650 
651 /// Cast vector of type AKSIMD_V2F64 to type AKSIMD_V4I32. This intrinsic is only
652 /// used for compilation and does not generate any instructions, thus it has zero latency.
653 #define AKSIMD_CAST_V2F64_TO_V4I32( __vec__ ) _mm_castpd_si128(__vec__)
654 
655 /// Cast vector of type AKSIMD_V4F32 to type AKSIMD_V2F64. This intrinsic is only
656 /// used for compilation and does not generate any instructions, thus it has zero latency.
657 #define AKSIMD_CAST_V4F32_TO_V2F64( __vec__ ) _mm_castps_pd(__vec__)
658 
659 /// Cast vector of type AKSIMD_V4F32 to type AKSIMD_V4I32. This intrinsic is only
660 /// used for compilation and does not generate any instructions, thus it has zero latency.
661 #define AKSIMD_CAST_V4F32_TO_V4I32( __vec__ ) _mm_castps_si128(__vec__)
662 
663 /// Cast vector of type AKSIMD_V4I32 to type AKSIMD_V2F64. This intrinsic is only
664 /// used for compilation and does not generate any instructions, thus it has zero latency.
665 #define AKSIMD_CAST_V4I32_TO_V2F64( __vec__ ) _mm_castsi128_pd(__vec__)
666 
667 /// Cast vector of type AKSIMD_V4I32 to type AKSIMD_V4F32. This intrinsic is only
668 /// used for compilation and does not generate any instructions, thus it has zero latency.
669 #define AKSIMD_CAST_V4I32_TO_V4F32( __vec__ ) _mm_castsi128_ps(__vec__)
670 
671 /// Cast vector of type AKSIMD_V4COND to AKSIMD_V4F32.
672 #define AKSIMD_CAST_V4COND_TO_V4F32( __vec__ ) (__vec__)
673 
674 /// Cast vector of type AKSIMD_V4F32 to AKSIMD_V4COND.
675 #define AKSIMD_CAST_V4F32_TO_V4COND( __vec__ ) (__vec__)
676 
677 /// Cast vector of type AKSIMD_V4COND to AKSIMD_V4I32.
678 #define AKSIMD_CAST_V4COND_TO_V4I32( __vec__ ) _mm_castps_si128(__vec__)
679 
680 /// Cast vector of type AKSIMD_V4I32 to AKSIMD_V4COND.
681 #define AKSIMD_CAST_V4I32_TO_V4COND( __vec__ ) _mm_castsi128_ps(__vec__)
682 
683 //@}
684 ////////////////////////////////////////////////////////////////////////
685 
686 /// Interleaves the lower 4 signed or unsigned 16-bit integers in a with
687 /// the lower 4 signed or unsigned 16-bit integers in b (see _mm_unpacklo_epi16)
688 #define AKSIMD_UNPACKLO_VECTOR8I16( a, b ) _mm_unpacklo_epi16( a, b )
689 
690 /// Interleaves the upper 4 signed or unsigned 16-bit integers in a with
691 /// the upper 4 signed or unsigned 16-bit integers in b (see _mm_unpackhi_epi16)
692 #define AKSIMD_UNPACKHI_VECTOR8I16( a, b ) _mm_unpackhi_epi16( a, b )
693 
694 /// Packs the 8 signed 32-bit integers from a and b into signed 16-bit
695 /// integers and saturates (see _mm_packs_epi32)
696 #define AKSIMD_PACKS_V4I32( a, b ) _mm_packs_epi32( a, b )
697 
698 ////////////////////////////////////////////////////////////////////////
699 /// @name AKSIMD shifting
700 //@{
701 
702 /// Shifts the 4 signed or unsigned 32-bit integers in a left by
703 /// in_shiftBy bits while shifting in zeros (see _mm_slli_epi32)
704 #define AKSIMD_SHIFTLEFT_V4I32( __vec__, __shiftBy__ ) \
705  _mm_slli_epi32( (__vec__), (__shiftBy__) )
706 
707 /// Shifts the 4 signed or unsigned 32-bit integers in a right by
708 /// in_shiftBy bits while shifting in zeros (see _mm_srli_epi32)
709 #define AKSIMD_SHIFTRIGHT_V4I32( __vec__, __shiftBy__ ) \
710  _mm_srli_epi32( (__vec__), (__shiftBy__) )
711 
712 /// Shifts the 4 signed 32-bit integers in a right by in_shiftBy
713 /// bits while shifting in the sign bit (see _mm_srai_epi32)
714 #define AKSIMD_SHIFTRIGHTARITH_V4I32( __vec__, __shiftBy__ ) \
715  _mm_srai_epi32( (__vec__), (__shiftBy__) )
716 
717 //@}
718 ////////////////////////////////////////////////////////////////////////
719 
720 #if defined( AK_CPU_X86 ) /// MMX
721 
722 typedef __m64 AKSIMD_V2F32; ///< Vector of 2 32-bit floats
723 
724 #define AKSIMD_SETZERO_V2F32() _mm_setzero_si64()
725 
726 #define AKSIMD_CMPGT_V2I32( a, b ) _mm_cmpgt_pi16(a,b)
727 
728 /// Interleaves the lower 2 signed or unsigned 16-bit integers in a with
729 /// the lower 2 signed or unsigned 16-bit integers in b (see _mm_unpackhi_epi16)
730 #define AKSIMD_UNPACKLO_VECTOR4I16( a, b ) _mm_unpacklo_pi16( a, b )
731 
732 /// Interleaves the upper 2 signed or unsigned 16-bit integers in a with
733 /// the upper 2 signed or unsigned 16-bit integers in b (see _mm_unpackhi_epi16)
734 #define AKSIMD_UNPACKHI_VECTOR4I16( a, b ) _mm_unpackhi_pi16( a, b )
735 
736 /// Shifts the 2 signed or unsigned 32-bit integers in a left by
737 /// in_shiftBy bits while shifting in zeros (see _mm_slli_epi32)
738 #define AKSIMD_SHIFTLEFT_V2I32( __vec__, __shiftBy__ ) \
739  _mm_slli_pi32( (__vec__), (__shiftBy__) )
740 
741 /// Shifts the 2 signed 32-bit integers in a right by in_shiftBy
742 /// bits while shifting in the sign bit (see _mm_srai_epi32)
743 #define AKSIMD_SHIFTRIGHTARITH_V2I32( __vec__, __shiftBy__ ) \
744  _mm_srai_pi32( (__vec__), (__shiftBy__) )
745 
746 /// Used when ending a block of code that utilizes any MMX construct on x86 code
747 /// so that the x87 FPU can be used again
748 #define AKSIMD_MMX_EMPTY _mm_empty()
749 
750 #endif
751 
#define AKSIMD_SHUFFLE(fp3, fp2, fp1, fp0)
Definition: AkSimd.h:127
float32x4_t AKSIMD_V4F32
Vector of 4 32-bit floats.
Definition: AkSimdTypes.h:62
static AkForceInline AKSIMD_V4COND AKSIMD_SETMASK_V4COND(AkUInt32 x)
Populates the full vector with the mask[3:0], setting each to 0 or ~0.
Definition: AkSimd.h:78
#define AKSIMD_SHUFFLE_BADC(__a__)
Swap the 2 lower floats together and the 2 higher floats together.
Definition: AkSimd.h:151
#define AKSIMD_MADD_V4F32(__a__, __b__, __c__)
Vector multiply-add operation. (if we're targeting a platform or arch with FMA, (AVX2 implies FMA) us...
Definition: AkSimd.h:207
static AkForceInline bool AKSIMD_TESTONES_V4I32(AKSIMD_V4I32 a)
Definition: AkSimd.h:502
int32_t AkInt32
Signed 32-bit integer.
static AkForceInline AKSIMD_V4I32 AKSIMD_MULLO_V4I32(const AKSIMD_V4I32 vIn1, const AKSIMD_V4I32 vIn2)
Multiplies the low 32bits of a by b and stores it in V4I32 (no overflow)
Definition: AkSimd.h:349
static AkForceInline AKSIMD_V4F32 AKSIMD_HORIZONTALADD_V4F32(AKSIMD_V4F32 vVec)
Definition: AkSimd.h:251
static AkForceInline AKSIMD_V4F32 AKSIMD_CONVERT_V4F16_TO_V4F32_HELPER(AKSIMD_V4I32 vec)
Definition: AkSimd.h:573
static AkForceInline AKSIMD_V4F32 AKSIMD_CEIL_V4F32(const AKSIMD_V4F32 &x)
Rounds to upper value.
Definition: AkSimd.h:241
static AkForceInline bool AKSIMD_TESTZERO_V4I32(AKSIMD_V4I32 a)
Definition: AkSimd.h:494
int16_t AkInt16
Signed 16-bit integer.
static AkForceInline AKSIMD_V4I32 AKSIMD_CONVERT_V4F32_TO_V4F16(AKSIMD_V4F32 vec)
Definition: AkSimd.h:594
static AkForceInline AKSIMD_V4F32 AKSIMD_DOTPRODUCT(AKSIMD_V4F32 &vVec, const AKSIMD_V4F32 &vfSigns)
Definition: AkSimd.h:260
__m64 AKSIMD_V2F32
MMX.
Definition: AkSimd.h:722
int64_t AkInt64
Signed 64-bit integer.
#define AKSIMD_XOR_V4F32(a, b)
Binary xor for single-precision floating-point.
Definition: AkSimd.h:238
static AkForceInline AKSIMD_V4I32X4 AKSIMD_GATHER_V4I64_AND_DEINTERLEAVE_V4I32X4(AkInt16 *addr3, AkInt16 *addr2, AkInt16 *addr1, AkInt16 *addr0)
Definition: AkSimd.h:420
#define AKSIMD_SHUFFLE_V4F32(a, b, i)
Definition: AkSimd.h:132
int32x4_t AKSIMD_V4I32
Vector of 4 32-bit signed integers.
Definition: AkSimdTypes.h:54
#define AKSIMD_MUL_V4F32(a, b)
Definition: AkSimd.h:192
static AkForceInline AKSIMD_V4F32 AKSIMD_VSEL_V4F32(AKSIMD_V4F32 vA, AKSIMD_V4F32 vB, AKSIMD_V4F32 vMask)
Return a when control mask is 0, return b when control mask is non zero, control mask is in c and usu...
Definition: AkSimd.h:472
uint32_t AkUInt32
Unsigned 32-bit integer.
uint32x4_t AKSIMD_V4COND
Vector of 4 comparison results.
Definition: AkSimdTypes.h:64
static AkForceInline AKSIMD_V4I32X2 AKSIMD_GATHER_V4I32_AND_DEINTERLEAVE_V4I32X2(AkInt16 *addr3, AkInt16 *addr2, AkInt16 *addr1, AkInt16 *addr0)
Definition: AkSimd.h:385
#define AkForceInline
Definition: AkTypes.h:63
static AkForceInline AKSIMD_V4F32 AKSIMD_COMPLEXMUL_V4F32(const AKSIMD_V4F32 vCIn1, const AKSIMD_V4F32 vCIn2)
Cross-platform SIMD multiplication of 2 complex data elements with interleaved real and imaginary par...
Definition: AkSimd.h:267

Was this page helpful?

Need Support?

Questions? Problems? Need more info? Contact us, and we can help!

Visit our Support page

Tell us about your project. We're here to help.

Register your project and we'll help you get started with no strings attached!

Get started with Wwise