Bullet Collision Detection & Physics Library
btMatrix3x3.h
Go to the documentation of this file.
1 /*
2 Copyright (c) 2003-2006 Gino van den Bergen / Erwin Coumans http://continuousphysics.com/Bullet/
3 
4 This software is provided 'as-is', without any express or implied warranty.
5 In no event will the authors be held liable for any damages arising from the use of this software.
6 Permission is granted to anyone to use this software for any purpose,
7 including commercial applications, and to alter it and redistribute it freely,
8 subject to the following restrictions:
9 
10 1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
11 2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
12 3. This notice may not be removed or altered from any source distribution.
13 */
14 
15 
16 #ifndef BT_MATRIX3x3_H
17 #define BT_MATRIX3x3_H
18 
19 #include "btVector3.h"
20 #include "btQuaternion.h"
21 #include <stdio.h>
22 
23 #ifdef BT_USE_SSE
24 //const __m128 ATTRIBUTE_ALIGNED16(v2220) = {2.0f, 2.0f, 2.0f, 0.0f};
25 //const __m128 ATTRIBUTE_ALIGNED16(vMPPP) = {-0.0f, +0.0f, +0.0f, +0.0f};
26 #define vMPPP (_mm_set_ps (+0.0f, +0.0f, +0.0f, -0.0f))
27 #endif
28 
29 #if defined(BT_USE_SSE)
30 #define v1000 (_mm_set_ps(0.0f,0.0f,0.0f,1.0f))
31 #define v0100 (_mm_set_ps(0.0f,0.0f,1.0f,0.0f))
32 #define v0010 (_mm_set_ps(0.0f,1.0f,0.0f,0.0f))
33 #elif defined(BT_USE_NEON)
34 const btSimdFloat4 ATTRIBUTE_ALIGNED16(v1000) = {1.0f, 0.0f, 0.0f, 0.0f};
35 const btSimdFloat4 ATTRIBUTE_ALIGNED16(v0100) = {0.0f, 1.0f, 0.0f, 0.0f};
36 const btSimdFloat4 ATTRIBUTE_ALIGNED16(v0010) = {0.0f, 0.0f, 1.0f, 0.0f};
37 #endif
38 
39 #ifdef BT_USE_DOUBLE_PRECISION
40 #define btMatrix3x3Data btMatrix3x3DoubleData
41 #else
42 #define btMatrix3x3Data btMatrix3x3FloatData
43 #endif //BT_USE_DOUBLE_PRECISION
44 
45 
49 
51  btVector3 m_el[3];
52 
53 public:
56 
57  // explicit btMatrix3x3(const btScalar *m) { setFromOpenGLSubMatrix(m); }
58 
60  explicit btMatrix3x3(const btQuaternion& q) { setRotation(q); }
61  /*
62  template <typename btScalar>
63  Matrix3x3(const btScalar& yaw, const btScalar& pitch, const btScalar& roll)
64  {
65  setEulerYPR(yaw, pitch, roll);
66  }
67  */
69  btMatrix3x3(const btScalar& xx, const btScalar& xy, const btScalar& xz,
70  const btScalar& yx, const btScalar& yy, const btScalar& yz,
71  const btScalar& zx, const btScalar& zy, const btScalar& zz)
72  {
73  setValue(xx, xy, xz,
74  yx, yy, yz,
75  zx, zy, zz);
76  }
77 
78 #if (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))|| defined (BT_USE_NEON)
79  SIMD_FORCE_INLINE btMatrix3x3 (const btSimdFloat4 v0, const btSimdFloat4 v1, const btSimdFloat4 v2 )
80  {
81  m_el[0].mVec128 = v0;
82  m_el[1].mVec128 = v1;
83  m_el[2].mVec128 = v2;
84  }
85 
86  SIMD_FORCE_INLINE btMatrix3x3 (const btVector3& v0, const btVector3& v1, const btVector3& v2 )
87  {
88  m_el[0] = v0;
89  m_el[1] = v1;
90  m_el[2] = v2;
91  }
92 
93  // Copy constructor
95  {
96  m_el[0].mVec128 = rhs.m_el[0].mVec128;
97  m_el[1].mVec128 = rhs.m_el[1].mVec128;
98  m_el[2].mVec128 = rhs.m_el[2].mVec128;
99  }
100 
101  // Assignment Operator
102  SIMD_FORCE_INLINE btMatrix3x3& operator=(const btMatrix3x3& m)
103  {
104  m_el[0].mVec128 = m.m_el[0].mVec128;
105  m_el[1].mVec128 = m.m_el[1].mVec128;
106  m_el[2].mVec128 = m.m_el[2].mVec128;
107 
108  return *this;
109  }
110 
111 #else
112 
115  {
116  m_el[0] = other.m_el[0];
117  m_el[1] = other.m_el[1];
118  m_el[2] = other.m_el[2];
119  }
120 
123  {
124  m_el[0] = other.m_el[0];
125  m_el[1] = other.m_el[1];
126  m_el[2] = other.m_el[2];
127  return *this;
128  }
129 
130 #endif
131 
135  {
136  return btVector3(m_el[0][i],m_el[1][i],m_el[2][i]);
137  }
138 
139 
142  SIMD_FORCE_INLINE const btVector3& getRow(int i) const
143  {
144  btFullAssert(0 <= i && i < 3);
145  return m_el[i];
146  }
147 
151  {
152  btFullAssert(0 <= i && i < 3);
153  return m_el[i];
154  }
155 
159  {
160  btFullAssert(0 <= i && i < 3);
161  return m_el[i];
162  }
163 
167  btMatrix3x3& operator*=(const btMatrix3x3& m);
168 
172  btMatrix3x3& operator+=(const btMatrix3x3& m);
173 
177  btMatrix3x3& operator-=(const btMatrix3x3& m);
178 
182  {
183  m_el[0].setValue(m[0],m[4],m[8]);
184  m_el[1].setValue(m[1],m[5],m[9]);
185  m_el[2].setValue(m[2],m[6],m[10]);
186 
187  }
198  void setValue(const btScalar& xx, const btScalar& xy, const btScalar& xz,
199  const btScalar& yx, const btScalar& yy, const btScalar& yz,
200  const btScalar& zx, const btScalar& zy, const btScalar& zz)
201  {
202  m_el[0].setValue(xx,xy,xz);
203  m_el[1].setValue(yx,yy,yz);
204  m_el[2].setValue(zx,zy,zz);
205  }
206 
209  void setRotation(const btQuaternion& q)
210  {
211  btScalar d = q.length2();
212  btFullAssert(d != btScalar(0.0));
213  btScalar s = btScalar(2.0) / d;
214 
215  #if defined BT_USE_SIMD_VECTOR3 && defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
216  __m128 vs, Q = q.get128();
217  __m128i Qi = btCastfTo128i(Q);
218  __m128 Y, Z;
219  __m128 V1, V2, V3;
220  __m128 V11, V21, V31;
221  __m128 NQ = _mm_xor_ps(Q, btvMzeroMask);
222  __m128i NQi = btCastfTo128i(NQ);
223 
224  V1 = btCastiTo128f(_mm_shuffle_epi32 (Qi, BT_SHUFFLE(1,0,2,3))); // Y X Z W
225  V2 = _mm_shuffle_ps(NQ, Q, BT_SHUFFLE(0,0,1,3)); // -X -X Y W
226  V3 = btCastiTo128f(_mm_shuffle_epi32 (Qi, BT_SHUFFLE(2,1,0,3))); // Z Y X W
227  V1 = _mm_xor_ps(V1, vMPPP); // change the sign of the first element
228 
229  V11 = btCastiTo128f(_mm_shuffle_epi32 (Qi, BT_SHUFFLE(1,1,0,3))); // Y Y X W
230  V21 = _mm_unpackhi_ps(Q, Q); // Z Z W W
231  V31 = _mm_shuffle_ps(Q, NQ, BT_SHUFFLE(0,2,0,3)); // X Z -X -W
232 
233  V2 = V2 * V1; //
234  V1 = V1 * V11; //
235  V3 = V3 * V31; //
236 
237  V11 = _mm_shuffle_ps(NQ, Q, BT_SHUFFLE(2,3,1,3)); // -Z -W Y W
238  V11 = V11 * V21; //
239  V21 = _mm_xor_ps(V21, vMPPP); // change the sign of the first element
240  V31 = _mm_shuffle_ps(Q, NQ, BT_SHUFFLE(3,3,1,3)); // W W -Y -W
241  V31 = _mm_xor_ps(V31, vMPPP); // change the sign of the first element
242  Y = btCastiTo128f(_mm_shuffle_epi32 (NQi, BT_SHUFFLE(3,2,0,3))); // -W -Z -X -W
243  Z = btCastiTo128f(_mm_shuffle_epi32 (Qi, BT_SHUFFLE(1,0,1,3))); // Y X Y W
244 
245  vs = _mm_load_ss(&s);
246  V21 = V21 * Y;
247  V31 = V31 * Z;
248 
249  V1 = V1 + V11;
250  V2 = V2 + V21;
251  V3 = V3 + V31;
252 
253  vs = bt_splat3_ps(vs, 0);
254  // s ready
255  V1 = V1 * vs;
256  V2 = V2 * vs;
257  V3 = V3 * vs;
258 
259  V1 = V1 + v1000;
260  V2 = V2 + v0100;
261  V3 = V3 + v0010;
262 
263  m_el[0] = V1;
264  m_el[1] = V2;
265  m_el[2] = V3;
266  #else
267  btScalar xs = q.x() * s, ys = q.y() * s, zs = q.z() * s;
268  btScalar wx = q.w() * xs, wy = q.w() * ys, wz = q.w() * zs;
269  btScalar xx = q.x() * xs, xy = q.x() * ys, xz = q.x() * zs;
270  btScalar yy = q.y() * ys, yz = q.y() * zs, zz = q.z() * zs;
271  setValue(
272  btScalar(1.0) - (yy + zz), xy - wz, xz + wy,
273  xy + wz, btScalar(1.0) - (xx + zz), yz - wx,
274  xz - wy, yz + wx, btScalar(1.0) - (xx + yy));
275  #endif
276  }
277 
278 
284  void setEulerYPR(const btScalar& yaw, const btScalar& pitch, const btScalar& roll)
285  {
286  setEulerZYX(roll, pitch, yaw);
287  }
288 
298  void setEulerZYX(btScalar eulerX,btScalar eulerY,btScalar eulerZ) {
300  btScalar ci ( btCos(eulerX));
301  btScalar cj ( btCos(eulerY));
302  btScalar ch ( btCos(eulerZ));
303  btScalar si ( btSin(eulerX));
304  btScalar sj ( btSin(eulerY));
305  btScalar sh ( btSin(eulerZ));
306  btScalar cc = ci * ch;
307  btScalar cs = ci * sh;
308  btScalar sc = si * ch;
309  btScalar ss = si * sh;
310 
311  setValue(cj * ch, sj * sc - cs, sj * cc + ss,
312  cj * sh, sj * ss + cc, sj * cs - sc,
313  -sj, cj * si, cj * ci);
314  }
315 
317  void setIdentity()
318  {
319 #if (defined(BT_USE_SSE_IN_API)&& defined (BT_USE_SSE)) || defined(BT_USE_NEON)
320  m_el[0] = v1000;
321  m_el[1] = v0100;
322  m_el[2] = v0010;
323 #else
324  setValue(btScalar(1.0), btScalar(0.0), btScalar(0.0),
325  btScalar(0.0), btScalar(1.0), btScalar(0.0),
326  btScalar(0.0), btScalar(0.0), btScalar(1.0));
327 #endif
328  }
329 
330  static const btMatrix3x3& getIdentity()
331  {
332 #if (defined(BT_USE_SSE_IN_API)&& defined (BT_USE_SSE)) || defined(BT_USE_NEON)
333  static const btMatrix3x3
334  identityMatrix(v1000, v0100, v0010);
335 #else
336  static const btMatrix3x3
337  identityMatrix(
338  btScalar(1.0), btScalar(0.0), btScalar(0.0),
339  btScalar(0.0), btScalar(1.0), btScalar(0.0),
340  btScalar(0.0), btScalar(0.0), btScalar(1.0));
341 #endif
342  return identityMatrix;
343  }
344 
347  void getOpenGLSubMatrix(btScalar *m) const
348  {
349 #if defined BT_USE_SIMD_VECTOR3 && defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
350  __m128 v0 = m_el[0].mVec128;
351  __m128 v1 = m_el[1].mVec128;
352  __m128 v2 = m_el[2].mVec128; // x2 y2 z2 w2
353  __m128 *vm = (__m128 *)m;
354  __m128 vT;
355 
356  v2 = _mm_and_ps(v2, btvFFF0fMask); // x2 y2 z2 0
357 
358  vT = _mm_unpackhi_ps(v0, v1); // z0 z1 * *
359  v0 = _mm_unpacklo_ps(v0, v1); // x0 x1 y0 y1
360 
361  v1 = _mm_shuffle_ps(v0, v2, BT_SHUFFLE(2, 3, 1, 3) ); // y0 y1 y2 0
362  v0 = _mm_shuffle_ps(v0, v2, BT_SHUFFLE(0, 1, 0, 3) ); // x0 x1 x2 0
363  v2 = btCastdTo128f(_mm_move_sd(btCastfTo128d(v2), btCastfTo128d(vT))); // z0 z1 z2 0
364 
365  vm[0] = v0;
366  vm[1] = v1;
367  vm[2] = v2;
368 #elif defined(BT_USE_NEON)
369  // note: zeros the w channel. We can preserve it at the cost of two more vtrn instructions.
370  static const uint32x2_t zMask = (const uint32x2_t) {static_cast<uint32_t>(-1), 0 };
371  float32x4_t *vm = (float32x4_t *)m;
372  float32x4x2_t top = vtrnq_f32( m_el[0].mVec128, m_el[1].mVec128 ); // {x0 x1 z0 z1}, {y0 y1 w0 w1}
373  float32x2x2_t bl = vtrn_f32( vget_low_f32(m_el[2].mVec128), vdup_n_f32(0.0f) ); // {x2 0 }, {y2 0}
374  float32x4_t v0 = vcombine_f32( vget_low_f32(top.val[0]), bl.val[0] );
375  float32x4_t v1 = vcombine_f32( vget_low_f32(top.val[1]), bl.val[1] );
376  float32x2_t q = (float32x2_t) vand_u32( (uint32x2_t) vget_high_f32( m_el[2].mVec128), zMask );
377  float32x4_t v2 = vcombine_f32( vget_high_f32(top.val[0]), q ); // z0 z1 z2 0
378 
379  vm[0] = v0;
380  vm[1] = v1;
381  vm[2] = v2;
382 #else
383  m[0] = btScalar(m_el[0].x());
384  m[1] = btScalar(m_el[1].x());
385  m[2] = btScalar(m_el[2].x());
386  m[3] = btScalar(0.0);
387  m[4] = btScalar(m_el[0].y());
388  m[5] = btScalar(m_el[1].y());
389  m[6] = btScalar(m_el[2].y());
390  m[7] = btScalar(0.0);
391  m[8] = btScalar(m_el[0].z());
392  m[9] = btScalar(m_el[1].z());
393  m[10] = btScalar(m_el[2].z());
394  m[11] = btScalar(0.0);
395 #endif
396  }
397 
400  void getRotation(btQuaternion& q) const
401  {
402 #if (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))|| defined (BT_USE_NEON)
403  btScalar trace = m_el[0].x() + m_el[1].y() + m_el[2].z();
404  btScalar s, x;
405 
406  union {
407  btSimdFloat4 vec;
408  btScalar f[4];
409  } temp;
410 
411  if (trace > btScalar(0.0))
412  {
413  x = trace + btScalar(1.0);
414 
415  temp.f[0]=m_el[2].y() - m_el[1].z();
416  temp.f[1]=m_el[0].z() - m_el[2].x();
417  temp.f[2]=m_el[1].x() - m_el[0].y();
418  temp.f[3]=x;
419  //temp.f[3]= s * btScalar(0.5);
420  }
421  else
422  {
423  int i, j, k;
424  if(m_el[0].x() < m_el[1].y())
425  {
426  if( m_el[1].y() < m_el[2].z() )
427  { i = 2; j = 0; k = 1; }
428  else
429  { i = 1; j = 2; k = 0; }
430  }
431  else
432  {
433  if( m_el[0].x() < m_el[2].z())
434  { i = 2; j = 0; k = 1; }
435  else
436  { i = 0; j = 1; k = 2; }
437  }
438 
439  x = m_el[i][i] - m_el[j][j] - m_el[k][k] + btScalar(1.0);
440 
441  temp.f[3] = (m_el[k][j] - m_el[j][k]);
442  temp.f[j] = (m_el[j][i] + m_el[i][j]);
443  temp.f[k] = (m_el[k][i] + m_el[i][k]);
444  temp.f[i] = x;
445  //temp.f[i] = s * btScalar(0.5);
446  }
447 
448  s = btSqrt(x);
449  q.set128(temp.vec);
450  s = btScalar(0.5) / s;
451 
452  q *= s;
453 #else
454  btScalar trace = m_el[0].x() + m_el[1].y() + m_el[2].z();
455 
456  btScalar temp[4];
457 
458  if (trace > btScalar(0.0))
459  {
460  btScalar s = btSqrt(trace + btScalar(1.0));
461  temp[3]=(s * btScalar(0.5));
462  s = btScalar(0.5) / s;
463 
464  temp[0]=((m_el[2].y() - m_el[1].z()) * s);
465  temp[1]=((m_el[0].z() - m_el[2].x()) * s);
466  temp[2]=((m_el[1].x() - m_el[0].y()) * s);
467  }
468  else
469  {
470  int i = m_el[0].x() < m_el[1].y() ?
471  (m_el[1].y() < m_el[2].z() ? 2 : 1) :
472  (m_el[0].x() < m_el[2].z() ? 2 : 0);
473  int j = (i + 1) % 3;
474  int k = (i + 2) % 3;
475 
476  btScalar s = btSqrt(m_el[i][i] - m_el[j][j] - m_el[k][k] + btScalar(1.0));
477  temp[i] = s * btScalar(0.5);
478  s = btScalar(0.5) / s;
479 
480  temp[3] = (m_el[k][j] - m_el[j][k]) * s;
481  temp[j] = (m_el[j][i] + m_el[i][j]) * s;
482  temp[k] = (m_el[k][i] + m_el[i][k]) * s;
483  }
484  q.setValue(temp[0],temp[1],temp[2],temp[3]);
485 #endif
486  }
487 
492  void getEulerYPR(btScalar& yaw, btScalar& pitch, btScalar& roll) const
493  {
494 
495  // first use the normal calculus
496  yaw = btScalar(btAtan2(m_el[1].x(), m_el[0].x()));
497  pitch = btScalar(btAsin(-m_el[2].x()));
498  roll = btScalar(btAtan2(m_el[2].y(), m_el[2].z()));
499 
500  // on pitch = +/-HalfPI
501  if (btFabs(pitch)==SIMD_HALF_PI)
502  {
503  if (yaw>0)
504  yaw-=SIMD_PI;
505  else
506  yaw+=SIMD_PI;
507 
508  if (roll>0)
509  roll-=SIMD_PI;
510  else
511  roll+=SIMD_PI;
512  }
513  };
514 
515 
521  void getEulerZYX(btScalar& yaw, btScalar& pitch, btScalar& roll, unsigned int solution_number = 1) const
522  {
523  struct Euler
524  {
525  btScalar yaw;
526  btScalar pitch;
527  btScalar roll;
528  };
529 
530  Euler euler_out;
531  Euler euler_out2; //second solution
532  //get the pointer to the raw data
533 
534  // Check that pitch is not at a singularity
535  if (btFabs(m_el[2].x()) >= 1)
536  {
537  euler_out.yaw = 0;
538  euler_out2.yaw = 0;
539 
540  // From difference of angles formula
541  btScalar delta = btAtan2(m_el[0].x(),m_el[0].z());
542  if (m_el[2].x() > 0) //gimbal locked up
543  {
544  euler_out.pitch = SIMD_PI / btScalar(2.0);
545  euler_out2.pitch = SIMD_PI / btScalar(2.0);
546  euler_out.roll = euler_out.pitch + delta;
547  euler_out2.roll = euler_out.pitch + delta;
548  }
549  else // gimbal locked down
550  {
551  euler_out.pitch = -SIMD_PI / btScalar(2.0);
552  euler_out2.pitch = -SIMD_PI / btScalar(2.0);
553  euler_out.roll = -euler_out.pitch + delta;
554  euler_out2.roll = -euler_out.pitch + delta;
555  }
556  }
557  else
558  {
559  euler_out.pitch = - btAsin(m_el[2].x());
560  euler_out2.pitch = SIMD_PI - euler_out.pitch;
561 
562  euler_out.roll = btAtan2(m_el[2].y()/btCos(euler_out.pitch),
563  m_el[2].z()/btCos(euler_out.pitch));
564  euler_out2.roll = btAtan2(m_el[2].y()/btCos(euler_out2.pitch),
565  m_el[2].z()/btCos(euler_out2.pitch));
566 
567  euler_out.yaw = btAtan2(m_el[1].x()/btCos(euler_out.pitch),
568  m_el[0].x()/btCos(euler_out.pitch));
569  euler_out2.yaw = btAtan2(m_el[1].x()/btCos(euler_out2.pitch),
570  m_el[0].x()/btCos(euler_out2.pitch));
571  }
572 
573  if (solution_number == 1)
574  {
575  yaw = euler_out.yaw;
576  pitch = euler_out.pitch;
577  roll = euler_out.roll;
578  }
579  else
580  {
581  yaw = euler_out2.yaw;
582  pitch = euler_out2.pitch;
583  roll = euler_out2.roll;
584  }
585  }
586 
590  btMatrix3x3 scaled(const btVector3& s) const
591  {
592 #if (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))|| defined (BT_USE_NEON)
593  return btMatrix3x3(m_el[0] * s, m_el[1] * s, m_el[2] * s);
594 #else
595  return btMatrix3x3(
596  m_el[0].x() * s.x(), m_el[0].y() * s.y(), m_el[0].z() * s.z(),
597  m_el[1].x() * s.x(), m_el[1].y() * s.y(), m_el[1].z() * s.z(),
598  m_el[2].x() * s.x(), m_el[2].y() * s.y(), m_el[2].z() * s.z());
599 #endif
600  }
601 
603  btScalar determinant() const;
605  btMatrix3x3 adjoint() const;
607  btMatrix3x3 absolute() const;
609  btMatrix3x3 transpose() const;
611  btMatrix3x3 inverse() const;
612 
616  btVector3 solve33(const btVector3& b) const
617  {
618  btVector3 col1 = getColumn(0);
619  btVector3 col2 = getColumn(1);
620  btVector3 col3 = getColumn(2);
621 
622  btScalar det = btDot(col1, btCross(col2, col3));
623  if (btFabs(det)>SIMD_EPSILON)
624  {
625  det = 1.0f / det;
626  }
627  btVector3 x;
628  x[0] = det * btDot(b, btCross(col2, col3));
629  x[1] = det * btDot(col1, btCross(b, col3));
630  x[2] = det * btDot(col1, btCross(col2, b));
631  return x;
632  }
633 
634  btMatrix3x3 transposeTimes(const btMatrix3x3& m) const;
635  btMatrix3x3 timesTranspose(const btMatrix3x3& m) const;
636 
638  {
639  return m_el[0].x() * v.x() + m_el[1].x() * v.y() + m_el[2].x() * v.z();
640  }
642  {
643  return m_el[0].y() * v.x() + m_el[1].y() * v.y() + m_el[2].y() * v.z();
644  }
646  {
647  return m_el[0].z() * v.x() + m_el[1].z() * v.y() + m_el[2].z() * v.z();
648  }
649 
652  SIMD_FORCE_INLINE void extractRotation(btQuaternion &q,btScalar tolerance = 1.0e-9, int maxIter=100)
653  {
654  int iter =0;
655  btScalar w;
656  const btMatrix3x3& A=*this;
657  for(iter = 0; iter < maxIter; iter++)
658  {
659  btMatrix3x3 R(q);
660  btVector3 omega = (R.getColumn(0).cross(A.getColumn(0)) + R.getColumn(1).cross(A.getColumn(1))
661  + R.getColumn(2).cross(A.getColumn(2))
662  ) * (btScalar(1.0) / btFabs(R.getColumn(0).dot(A.getColumn(0)) + R.getColumn
663  (1).dot(A.getColumn(1)) + R.getColumn(2).dot(A.getColumn(2))) +
664  tolerance);
665  w = omega.norm();
666  if(w < tolerance)
667  break;
668  q = btQuaternion(btVector3((btScalar(1.0) / w) * omega),w) *
669  q;
670  q.normalize();
671  }
672  }
673 
674 
675 
682  void diagonalize(btMatrix3x3& rot, btScalar tolerance = 1.0e-9, int maxIter=100)
683  {
684  btQuaternion r;
686  extractRotation(r,tolerance,maxIter);
687  rot.setRotation(r);
688  btMatrix3x3 rotInv = btMatrix3x3(r.inverse());
689  btMatrix3x3 old = *this;
690  setValue(old.tdotx( rotInv[0]), old.tdoty( rotInv[0]), old.tdotz( rotInv[0]),
691  old.tdotx( rotInv[1]), old.tdoty( rotInv[1]), old.tdotz( rotInv[1]),
692  old.tdotx( rotInv[2]), old.tdoty( rotInv[2]), old.tdotz( rotInv[2]));
693  }
694 
695 
696 
697 
705  btScalar cofac(int r1, int c1, int r2, int c2) const
706  {
707  return m_el[r1][c1] * m_el[r2][c2] - m_el[r1][c2] * m_el[r2][c1];
708  }
709 
710  void serialize(struct btMatrix3x3Data& dataOut) const;
711 
712  void serializeFloat(struct btMatrix3x3FloatData& dataOut) const;
713 
714  void deSerialize(const struct btMatrix3x3Data& dataIn);
715 
716  void deSerializeFloat(const struct btMatrix3x3FloatData& dataIn);
717 
718  void deSerializeDouble(const struct btMatrix3x3DoubleData& dataIn);
719 
720 };
721 
722 
725 {
726 #if defined BT_USE_SIMD_VECTOR3 && defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
727  __m128 rv00, rv01, rv02;
728  __m128 rv10, rv11, rv12;
729  __m128 rv20, rv21, rv22;
730  __m128 mv0, mv1, mv2;
731 
732  rv02 = m_el[0].mVec128;
733  rv12 = m_el[1].mVec128;
734  rv22 = m_el[2].mVec128;
735 
736  mv0 = _mm_and_ps(m[0].mVec128, btvFFF0fMask);
737  mv1 = _mm_and_ps(m[1].mVec128, btvFFF0fMask);
738  mv2 = _mm_and_ps(m[2].mVec128, btvFFF0fMask);
739 
740  // rv0
741  rv00 = bt_splat_ps(rv02, 0);
742  rv01 = bt_splat_ps(rv02, 1);
743  rv02 = bt_splat_ps(rv02, 2);
744 
745  rv00 = _mm_mul_ps(rv00, mv0);
746  rv01 = _mm_mul_ps(rv01, mv1);
747  rv02 = _mm_mul_ps(rv02, mv2);
748 
749  // rv1
750  rv10 = bt_splat_ps(rv12, 0);
751  rv11 = bt_splat_ps(rv12, 1);
752  rv12 = bt_splat_ps(rv12, 2);
753 
754  rv10 = _mm_mul_ps(rv10, mv0);
755  rv11 = _mm_mul_ps(rv11, mv1);
756  rv12 = _mm_mul_ps(rv12, mv2);
757 
758  // rv2
759  rv20 = bt_splat_ps(rv22, 0);
760  rv21 = bt_splat_ps(rv22, 1);
761  rv22 = bt_splat_ps(rv22, 2);
762 
763  rv20 = _mm_mul_ps(rv20, mv0);
764  rv21 = _mm_mul_ps(rv21, mv1);
765  rv22 = _mm_mul_ps(rv22, mv2);
766 
767  rv00 = _mm_add_ps(rv00, rv01);
768  rv10 = _mm_add_ps(rv10, rv11);
769  rv20 = _mm_add_ps(rv20, rv21);
770 
771  m_el[0].mVec128 = _mm_add_ps(rv00, rv02);
772  m_el[1].mVec128 = _mm_add_ps(rv10, rv12);
773  m_el[2].mVec128 = _mm_add_ps(rv20, rv22);
774 
775 #elif defined(BT_USE_NEON)
776 
777  float32x4_t rv0, rv1, rv2;
778  float32x4_t v0, v1, v2;
779  float32x4_t mv0, mv1, mv2;
780 
781  v0 = m_el[0].mVec128;
782  v1 = m_el[1].mVec128;
783  v2 = m_el[2].mVec128;
784 
785  mv0 = (float32x4_t) vandq_s32((int32x4_t)m[0].mVec128, btvFFF0Mask);
786  mv1 = (float32x4_t) vandq_s32((int32x4_t)m[1].mVec128, btvFFF0Mask);
787  mv2 = (float32x4_t) vandq_s32((int32x4_t)m[2].mVec128, btvFFF0Mask);
788 
789  rv0 = vmulq_lane_f32(mv0, vget_low_f32(v0), 0);
790  rv1 = vmulq_lane_f32(mv0, vget_low_f32(v1), 0);
791  rv2 = vmulq_lane_f32(mv0, vget_low_f32(v2), 0);
792 
793  rv0 = vmlaq_lane_f32(rv0, mv1, vget_low_f32(v0), 1);
794  rv1 = vmlaq_lane_f32(rv1, mv1, vget_low_f32(v1), 1);
795  rv2 = vmlaq_lane_f32(rv2, mv1, vget_low_f32(v2), 1);
796 
797  rv0 = vmlaq_lane_f32(rv0, mv2, vget_high_f32(v0), 0);
798  rv1 = vmlaq_lane_f32(rv1, mv2, vget_high_f32(v1), 0);
799  rv2 = vmlaq_lane_f32(rv2, mv2, vget_high_f32(v2), 0);
800 
801  m_el[0].mVec128 = rv0;
802  m_el[1].mVec128 = rv1;
803  m_el[2].mVec128 = rv2;
804 #else
805  setValue(
806  m.tdotx(m_el[0]), m.tdoty(m_el[0]), m.tdotz(m_el[0]),
807  m.tdotx(m_el[1]), m.tdoty(m_el[1]), m.tdotz(m_el[1]),
808  m.tdotx(m_el[2]), m.tdoty(m_el[2]), m.tdotz(m_el[2]));
809 #endif
810  return *this;
811 }
812 
815 {
816 #if (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))|| defined (BT_USE_NEON)
817  m_el[0].mVec128 = m_el[0].mVec128 + m.m_el[0].mVec128;
818  m_el[1].mVec128 = m_el[1].mVec128 + m.m_el[1].mVec128;
819  m_el[2].mVec128 = m_el[2].mVec128 + m.m_el[2].mVec128;
820 #else
821  setValue(
822  m_el[0][0]+m.m_el[0][0],
823  m_el[0][1]+m.m_el[0][1],
824  m_el[0][2]+m.m_el[0][2],
825  m_el[1][0]+m.m_el[1][0],
826  m_el[1][1]+m.m_el[1][1],
827  m_el[1][2]+m.m_el[1][2],
828  m_el[2][0]+m.m_el[2][0],
829  m_el[2][1]+m.m_el[2][1],
830  m_el[2][2]+m.m_el[2][2]);
831 #endif
832  return *this;
833 }
834 
836 operator*(const btMatrix3x3& m, const btScalar & k)
837 {
838 #if (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))
839  __m128 vk = bt_splat_ps(_mm_load_ss((float *)&k), 0x80);
840  return btMatrix3x3(
841  _mm_mul_ps(m[0].mVec128, vk),
842  _mm_mul_ps(m[1].mVec128, vk),
843  _mm_mul_ps(m[2].mVec128, vk));
844 #elif defined(BT_USE_NEON)
845  return btMatrix3x3(
846  vmulq_n_f32(m[0].mVec128, k),
847  vmulq_n_f32(m[1].mVec128, k),
848  vmulq_n_f32(m[2].mVec128, k));
849 #else
850  return btMatrix3x3(
851  m[0].x()*k,m[0].y()*k,m[0].z()*k,
852  m[1].x()*k,m[1].y()*k,m[1].z()*k,
853  m[2].x()*k,m[2].y()*k,m[2].z()*k);
854 #endif
855 }
856 
858 operator+(const btMatrix3x3& m1, const btMatrix3x3& m2)
859 {
860 #if (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))|| defined (BT_USE_NEON)
861  return btMatrix3x3(
862  m1[0].mVec128 + m2[0].mVec128,
863  m1[1].mVec128 + m2[1].mVec128,
864  m1[2].mVec128 + m2[2].mVec128);
865 #else
866  return btMatrix3x3(
867  m1[0][0]+m2[0][0],
868  m1[0][1]+m2[0][1],
869  m1[0][2]+m2[0][2],
870 
871  m1[1][0]+m2[1][0],
872  m1[1][1]+m2[1][1],
873  m1[1][2]+m2[1][2],
874 
875  m1[2][0]+m2[2][0],
876  m1[2][1]+m2[2][1],
877  m1[2][2]+m2[2][2]);
878 #endif
879 }
880 
882 operator-(const btMatrix3x3& m1, const btMatrix3x3& m2)
883 {
884 #if (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))|| defined (BT_USE_NEON)
885  return btMatrix3x3(
886  m1[0].mVec128 - m2[0].mVec128,
887  m1[1].mVec128 - m2[1].mVec128,
888  m1[2].mVec128 - m2[2].mVec128);
889 #else
890  return btMatrix3x3(
891  m1[0][0]-m2[0][0],
892  m1[0][1]-m2[0][1],
893  m1[0][2]-m2[0][2],
894 
895  m1[1][0]-m2[1][0],
896  m1[1][1]-m2[1][1],
897  m1[1][2]-m2[1][2],
898 
899  m1[2][0]-m2[2][0],
900  m1[2][1]-m2[2][1],
901  m1[2][2]-m2[2][2]);
902 #endif
903 }
904 
905 
908 {
909 #if (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))|| defined (BT_USE_NEON)
910  m_el[0].mVec128 = m_el[0].mVec128 - m.m_el[0].mVec128;
911  m_el[1].mVec128 = m_el[1].mVec128 - m.m_el[1].mVec128;
912  m_el[2].mVec128 = m_el[2].mVec128 - m.m_el[2].mVec128;
913 #else
914  setValue(
915  m_el[0][0]-m.m_el[0][0],
916  m_el[0][1]-m.m_el[0][1],
917  m_el[0][2]-m.m_el[0][2],
918  m_el[1][0]-m.m_el[1][0],
919  m_el[1][1]-m.m_el[1][1],
920  m_el[1][2]-m.m_el[1][2],
921  m_el[2][0]-m.m_el[2][0],
922  m_el[2][1]-m.m_el[2][1],
923  m_el[2][2]-m.m_el[2][2]);
924 #endif
925  return *this;
926 }
927 
928 
931 {
932  return btTriple((*this)[0], (*this)[1], (*this)[2]);
933 }
934 
935 
938 {
939 #if defined BT_USE_SIMD_VECTOR3 && (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))
940  return btMatrix3x3(
941  _mm_and_ps(m_el[0].mVec128, btvAbsfMask),
942  _mm_and_ps(m_el[1].mVec128, btvAbsfMask),
943  _mm_and_ps(m_el[2].mVec128, btvAbsfMask));
944 #elif defined(BT_USE_NEON)
945  return btMatrix3x3(
946  (float32x4_t)vandq_s32((int32x4_t)m_el[0].mVec128, btv3AbsMask),
947  (float32x4_t)vandq_s32((int32x4_t)m_el[1].mVec128, btv3AbsMask),
948  (float32x4_t)vandq_s32((int32x4_t)m_el[2].mVec128, btv3AbsMask));
949 #else
950  return btMatrix3x3(
951  btFabs(m_el[0].x()), btFabs(m_el[0].y()), btFabs(m_el[0].z()),
952  btFabs(m_el[1].x()), btFabs(m_el[1].y()), btFabs(m_el[1].z()),
953  btFabs(m_el[2].x()), btFabs(m_el[2].y()), btFabs(m_el[2].z()));
954 #endif
955 }
956 
959 {
960 #if defined BT_USE_SIMD_VECTOR3 && (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))
961  __m128 v0 = m_el[0].mVec128;
962  __m128 v1 = m_el[1].mVec128;
963  __m128 v2 = m_el[2].mVec128; // x2 y2 z2 w2
964  __m128 vT;
965 
966  v2 = _mm_and_ps(v2, btvFFF0fMask); // x2 y2 z2 0
967 
968  vT = _mm_unpackhi_ps(v0, v1); // z0 z1 * *
969  v0 = _mm_unpacklo_ps(v0, v1); // x0 x1 y0 y1
970 
971  v1 = _mm_shuffle_ps(v0, v2, BT_SHUFFLE(2, 3, 1, 3) ); // y0 y1 y2 0
972  v0 = _mm_shuffle_ps(v0, v2, BT_SHUFFLE(0, 1, 0, 3) ); // x0 x1 x2 0
973  v2 = btCastdTo128f(_mm_move_sd(btCastfTo128d(v2), btCastfTo128d(vT))); // z0 z1 z2 0
974 
975 
976  return btMatrix3x3( v0, v1, v2 );
977 #elif defined(BT_USE_NEON)
978  // note: zeros the w channel. We can preserve it at the cost of two more vtrn instructions.
979  static const uint32x2_t zMask = (const uint32x2_t) {static_cast<uint32_t>(-1), 0 };
980  float32x4x2_t top = vtrnq_f32( m_el[0].mVec128, m_el[1].mVec128 ); // {x0 x1 z0 z1}, {y0 y1 w0 w1}
981  float32x2x2_t bl = vtrn_f32( vget_low_f32(m_el[2].mVec128), vdup_n_f32(0.0f) ); // {x2 0 }, {y2 0}
982  float32x4_t v0 = vcombine_f32( vget_low_f32(top.val[0]), bl.val[0] );
983  float32x4_t v1 = vcombine_f32( vget_low_f32(top.val[1]), bl.val[1] );
984  float32x2_t q = (float32x2_t) vand_u32( (uint32x2_t) vget_high_f32( m_el[2].mVec128), zMask );
985  float32x4_t v2 = vcombine_f32( vget_high_f32(top.val[0]), q ); // z0 z1 z2 0
986  return btMatrix3x3( v0, v1, v2 );
987 #else
988  return btMatrix3x3( m_el[0].x(), m_el[1].x(), m_el[2].x(),
989  m_el[0].y(), m_el[1].y(), m_el[2].y(),
990  m_el[0].z(), m_el[1].z(), m_el[2].z());
991 #endif
992 }
993 
996 {
997  return btMatrix3x3(cofac(1, 1, 2, 2), cofac(0, 2, 2, 1), cofac(0, 1, 1, 2),
998  cofac(1, 2, 2, 0), cofac(0, 0, 2, 2), cofac(0, 2, 1, 0),
999  cofac(1, 0, 2, 1), cofac(0, 1, 2, 0), cofac(0, 0, 1, 1));
1000 }
1001 
1004 {
1005  btVector3 co(cofac(1, 1, 2, 2), cofac(1, 2, 2, 0), cofac(1, 0, 2, 1));
1006  btScalar det = (*this)[0].dot(co);
1007  //btFullAssert(det != btScalar(0.0));
1008  btAssert(det != btScalar(0.0));
1009  btScalar s = btScalar(1.0) / det;
1010  return btMatrix3x3(co.x() * s, cofac(0, 2, 2, 1) * s, cofac(0, 1, 1, 2) * s,
1011  co.y() * s, cofac(0, 0, 2, 2) * s, cofac(0, 2, 1, 0) * s,
1012  co.z() * s, cofac(0, 1, 2, 0) * s, cofac(0, 0, 1, 1) * s);
1013 }
1014 
1017 {
1018 #if defined BT_USE_SIMD_VECTOR3 && (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))
1019  // zeros w
1020 // static const __m128i xyzMask = (const __m128i){ -1ULL, 0xffffffffULL };
1021  __m128 row = m_el[0].mVec128;
1022  __m128 m0 = _mm_and_ps( m.getRow(0).mVec128, btvFFF0fMask );
1023  __m128 m1 = _mm_and_ps( m.getRow(1).mVec128, btvFFF0fMask);
1024  __m128 m2 = _mm_and_ps( m.getRow(2).mVec128, btvFFF0fMask );
1025  __m128 r0 = _mm_mul_ps(m0, _mm_shuffle_ps(row, row, 0));
1026  __m128 r1 = _mm_mul_ps(m0, _mm_shuffle_ps(row, row, 0x55));
1027  __m128 r2 = _mm_mul_ps(m0, _mm_shuffle_ps(row, row, 0xaa));
1028  row = m_el[1].mVec128;
1029  r0 = _mm_add_ps( r0, _mm_mul_ps(m1, _mm_shuffle_ps(row, row, 0)));
1030  r1 = _mm_add_ps( r1, _mm_mul_ps(m1, _mm_shuffle_ps(row, row, 0x55)));
1031  r2 = _mm_add_ps( r2, _mm_mul_ps(m1, _mm_shuffle_ps(row, row, 0xaa)));
1032  row = m_el[2].mVec128;
1033  r0 = _mm_add_ps( r0, _mm_mul_ps(m2, _mm_shuffle_ps(row, row, 0)));
1034  r1 = _mm_add_ps( r1, _mm_mul_ps(m2, _mm_shuffle_ps(row, row, 0x55)));
1035  r2 = _mm_add_ps( r2, _mm_mul_ps(m2, _mm_shuffle_ps(row, row, 0xaa)));
1036  return btMatrix3x3( r0, r1, r2 );
1037 
1038 #elif defined BT_USE_NEON
1039  // zeros w
1040  static const uint32x4_t xyzMask = (const uint32x4_t){ static_cast<uint32_t>(-1), static_cast<uint32_t>(-1), static_cast<uint32_t>(-1), 0 };
1041  float32x4_t m0 = (float32x4_t) vandq_u32( (uint32x4_t) m.getRow(0).mVec128, xyzMask );
1042  float32x4_t m1 = (float32x4_t) vandq_u32( (uint32x4_t) m.getRow(1).mVec128, xyzMask );
1043  float32x4_t m2 = (float32x4_t) vandq_u32( (uint32x4_t) m.getRow(2).mVec128, xyzMask );
1044  float32x4_t row = m_el[0].mVec128;
1045  float32x4_t r0 = vmulq_lane_f32( m0, vget_low_f32(row), 0);
1046  float32x4_t r1 = vmulq_lane_f32( m0, vget_low_f32(row), 1);
1047  float32x4_t r2 = vmulq_lane_f32( m0, vget_high_f32(row), 0);
1048  row = m_el[1].mVec128;
1049  r0 = vmlaq_lane_f32( r0, m1, vget_low_f32(row), 0);
1050  r1 = vmlaq_lane_f32( r1, m1, vget_low_f32(row), 1);
1051  r2 = vmlaq_lane_f32( r2, m1, vget_high_f32(row), 0);
1052  row = m_el[2].mVec128;
1053  r0 = vmlaq_lane_f32( r0, m2, vget_low_f32(row), 0);
1054  r1 = vmlaq_lane_f32( r1, m2, vget_low_f32(row), 1);
1055  r2 = vmlaq_lane_f32( r2, m2, vget_high_f32(row), 0);
1056  return btMatrix3x3( r0, r1, r2 );
1057 #else
1058  return btMatrix3x3(
1059  m_el[0].x() * m[0].x() + m_el[1].x() * m[1].x() + m_el[2].x() * m[2].x(),
1060  m_el[0].x() * m[0].y() + m_el[1].x() * m[1].y() + m_el[2].x() * m[2].y(),
1061  m_el[0].x() * m[0].z() + m_el[1].x() * m[1].z() + m_el[2].x() * m[2].z(),
1062  m_el[0].y() * m[0].x() + m_el[1].y() * m[1].x() + m_el[2].y() * m[2].x(),
1063  m_el[0].y() * m[0].y() + m_el[1].y() * m[1].y() + m_el[2].y() * m[2].y(),
1064  m_el[0].y() * m[0].z() + m_el[1].y() * m[1].z() + m_el[2].y() * m[2].z(),
1065  m_el[0].z() * m[0].x() + m_el[1].z() * m[1].x() + m_el[2].z() * m[2].x(),
1066  m_el[0].z() * m[0].y() + m_el[1].z() * m[1].y() + m_el[2].z() * m[2].y(),
1067  m_el[0].z() * m[0].z() + m_el[1].z() * m[1].z() + m_el[2].z() * m[2].z());
1068 #endif
1069 }
1070 
1073 {
1074 #if (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))
1075  __m128 a0 = m_el[0].mVec128;
1076  __m128 a1 = m_el[1].mVec128;
1077  __m128 a2 = m_el[2].mVec128;
1078 
1079  btMatrix3x3 mT = m.transpose(); // we rely on transpose() zeroing w channel so that we don't have to do it here
1080  __m128 mx = mT[0].mVec128;
1081  __m128 my = mT[1].mVec128;
1082  __m128 mz = mT[2].mVec128;
1083 
1084  __m128 r0 = _mm_mul_ps(mx, _mm_shuffle_ps(a0, a0, 0x00));
1085  __m128 r1 = _mm_mul_ps(mx, _mm_shuffle_ps(a1, a1, 0x00));
1086  __m128 r2 = _mm_mul_ps(mx, _mm_shuffle_ps(a2, a2, 0x00));
1087  r0 = _mm_add_ps(r0, _mm_mul_ps(my, _mm_shuffle_ps(a0, a0, 0x55)));
1088  r1 = _mm_add_ps(r1, _mm_mul_ps(my, _mm_shuffle_ps(a1, a1, 0x55)));
1089  r2 = _mm_add_ps(r2, _mm_mul_ps(my, _mm_shuffle_ps(a2, a2, 0x55)));
1090  r0 = _mm_add_ps(r0, _mm_mul_ps(mz, _mm_shuffle_ps(a0, a0, 0xaa)));
1091  r1 = _mm_add_ps(r1, _mm_mul_ps(mz, _mm_shuffle_ps(a1, a1, 0xaa)));
1092  r2 = _mm_add_ps(r2, _mm_mul_ps(mz, _mm_shuffle_ps(a2, a2, 0xaa)));
1093  return btMatrix3x3( r0, r1, r2);
1094 
1095 #elif defined BT_USE_NEON
1096  float32x4_t a0 = m_el[0].mVec128;
1097  float32x4_t a1 = m_el[1].mVec128;
1098  float32x4_t a2 = m_el[2].mVec128;
1099 
1100  btMatrix3x3 mT = m.transpose(); // we rely on transpose() zeroing w channel so that we don't have to do it here
1101  float32x4_t mx = mT[0].mVec128;
1102  float32x4_t my = mT[1].mVec128;
1103  float32x4_t mz = mT[2].mVec128;
1104 
1105  float32x4_t r0 = vmulq_lane_f32( mx, vget_low_f32(a0), 0);
1106  float32x4_t r1 = vmulq_lane_f32( mx, vget_low_f32(a1), 0);
1107  float32x4_t r2 = vmulq_lane_f32( mx, vget_low_f32(a2), 0);
1108  r0 = vmlaq_lane_f32( r0, my, vget_low_f32(a0), 1);
1109  r1 = vmlaq_lane_f32( r1, my, vget_low_f32(a1), 1);
1110  r2 = vmlaq_lane_f32( r2, my, vget_low_f32(a2), 1);
1111  r0 = vmlaq_lane_f32( r0, mz, vget_high_f32(a0), 0);
1112  r1 = vmlaq_lane_f32( r1, mz, vget_high_f32(a1), 0);
1113  r2 = vmlaq_lane_f32( r2, mz, vget_high_f32(a2), 0);
1114  return btMatrix3x3( r0, r1, r2 );
1115 
1116 #else
1117  return btMatrix3x3(
1118  m_el[0].dot(m[0]), m_el[0].dot(m[1]), m_el[0].dot(m[2]),
1119  m_el[1].dot(m[0]), m_el[1].dot(m[1]), m_el[1].dot(m[2]),
1120  m_el[2].dot(m[0]), m_el[2].dot(m[1]), m_el[2].dot(m[2]));
1121 #endif
1122 }
1123 
1125 operator*(const btMatrix3x3& m, const btVector3& v)
1126 {
1127 #if (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))|| defined (BT_USE_NEON)
1128  return v.dot3(m[0], m[1], m[2]);
1129 #else
1130  return btVector3(m[0].dot(v), m[1].dot(v), m[2].dot(v));
1131 #endif
1132 }
1133 
1134 
1136 operator*(const btVector3& v, const btMatrix3x3& m)
1137 {
1138 #if defined BT_USE_SIMD_VECTOR3 && (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))
1139 
1140  const __m128 vv = v.mVec128;
1141 
1142  __m128 c0 = bt_splat_ps( vv, 0);
1143  __m128 c1 = bt_splat_ps( vv, 1);
1144  __m128 c2 = bt_splat_ps( vv, 2);
1145 
1146  c0 = _mm_mul_ps(c0, _mm_and_ps(m[0].mVec128, btvFFF0fMask) );
1147  c1 = _mm_mul_ps(c1, _mm_and_ps(m[1].mVec128, btvFFF0fMask) );
1148  c0 = _mm_add_ps(c0, c1);
1149  c2 = _mm_mul_ps(c2, _mm_and_ps(m[2].mVec128, btvFFF0fMask) );
1150 
1151  return btVector3(_mm_add_ps(c0, c2));
1152 #elif defined(BT_USE_NEON)
1153  const float32x4_t vv = v.mVec128;
1154  const float32x2_t vlo = vget_low_f32(vv);
1155  const float32x2_t vhi = vget_high_f32(vv);
1156 
1157  float32x4_t c0, c1, c2;
1158 
1159  c0 = (float32x4_t) vandq_s32((int32x4_t)m[0].mVec128, btvFFF0Mask);
1160  c1 = (float32x4_t) vandq_s32((int32x4_t)m[1].mVec128, btvFFF0Mask);
1161  c2 = (float32x4_t) vandq_s32((int32x4_t)m[2].mVec128, btvFFF0Mask);
1162 
1163  c0 = vmulq_lane_f32(c0, vlo, 0);
1164  c1 = vmulq_lane_f32(c1, vlo, 1);
1165  c2 = vmulq_lane_f32(c2, vhi, 0);
1166  c0 = vaddq_f32(c0, c1);
1167  c0 = vaddq_f32(c0, c2);
1168 
1169  return btVector3(c0);
1170 #else
1171  return btVector3(m.tdotx(v), m.tdoty(v), m.tdotz(v));
1172 #endif
1173 }
1174 
1176 operator*(const btMatrix3x3& m1, const btMatrix3x3& m2)
1177 {
1178 #if defined BT_USE_SIMD_VECTOR3 && (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))
1179 
1180  __m128 m10 = m1[0].mVec128;
1181  __m128 m11 = m1[1].mVec128;
1182  __m128 m12 = m1[2].mVec128;
1183 
1184  __m128 m2v = _mm_and_ps(m2[0].mVec128, btvFFF0fMask);
1185 
1186  __m128 c0 = bt_splat_ps( m10, 0);
1187  __m128 c1 = bt_splat_ps( m11, 0);
1188  __m128 c2 = bt_splat_ps( m12, 0);
1189 
1190  c0 = _mm_mul_ps(c0, m2v);
1191  c1 = _mm_mul_ps(c1, m2v);
1192  c2 = _mm_mul_ps(c2, m2v);
1193 
1194  m2v = _mm_and_ps(m2[1].mVec128, btvFFF0fMask);
1195 
1196  __m128 c0_1 = bt_splat_ps( m10, 1);
1197  __m128 c1_1 = bt_splat_ps( m11, 1);
1198  __m128 c2_1 = bt_splat_ps( m12, 1);
1199 
1200  c0_1 = _mm_mul_ps(c0_1, m2v);
1201  c1_1 = _mm_mul_ps(c1_1, m2v);
1202  c2_1 = _mm_mul_ps(c2_1, m2v);
1203 
1204  m2v = _mm_and_ps(m2[2].mVec128, btvFFF0fMask);
1205 
1206  c0 = _mm_add_ps(c0, c0_1);
1207  c1 = _mm_add_ps(c1, c1_1);
1208  c2 = _mm_add_ps(c2, c2_1);
1209 
1210  m10 = bt_splat_ps( m10, 2);
1211  m11 = bt_splat_ps( m11, 2);
1212  m12 = bt_splat_ps( m12, 2);
1213 
1214  m10 = _mm_mul_ps(m10, m2v);
1215  m11 = _mm_mul_ps(m11, m2v);
1216  m12 = _mm_mul_ps(m12, m2v);
1217 
1218  c0 = _mm_add_ps(c0, m10);
1219  c1 = _mm_add_ps(c1, m11);
1220  c2 = _mm_add_ps(c2, m12);
1221 
1222  return btMatrix3x3(c0, c1, c2);
1223 
1224 #elif defined(BT_USE_NEON)
1225 
1226  float32x4_t rv0, rv1, rv2;
1227  float32x4_t v0, v1, v2;
1228  float32x4_t mv0, mv1, mv2;
1229 
1230  v0 = m1[0].mVec128;
1231  v1 = m1[1].mVec128;
1232  v2 = m1[2].mVec128;
1233 
1234  mv0 = (float32x4_t) vandq_s32((int32x4_t)m2[0].mVec128, btvFFF0Mask);
1235  mv1 = (float32x4_t) vandq_s32((int32x4_t)m2[1].mVec128, btvFFF0Mask);
1236  mv2 = (float32x4_t) vandq_s32((int32x4_t)m2[2].mVec128, btvFFF0Mask);
1237 
1238  rv0 = vmulq_lane_f32(mv0, vget_low_f32(v0), 0);
1239  rv1 = vmulq_lane_f32(mv0, vget_low_f32(v1), 0);
1240  rv2 = vmulq_lane_f32(mv0, vget_low_f32(v2), 0);
1241 
1242  rv0 = vmlaq_lane_f32(rv0, mv1, vget_low_f32(v0), 1);
1243  rv1 = vmlaq_lane_f32(rv1, mv1, vget_low_f32(v1), 1);
1244  rv2 = vmlaq_lane_f32(rv2, mv1, vget_low_f32(v2), 1);
1245 
1246  rv0 = vmlaq_lane_f32(rv0, mv2, vget_high_f32(v0), 0);
1247  rv1 = vmlaq_lane_f32(rv1, mv2, vget_high_f32(v1), 0);
1248  rv2 = vmlaq_lane_f32(rv2, mv2, vget_high_f32(v2), 0);
1249 
1250  return btMatrix3x3(rv0, rv1, rv2);
1251 
1252 #else
1253  return btMatrix3x3(
1254  m2.tdotx( m1[0]), m2.tdoty( m1[0]), m2.tdotz( m1[0]),
1255  m2.tdotx( m1[1]), m2.tdoty( m1[1]), m2.tdotz( m1[1]),
1256  m2.tdotx( m1[2]), m2.tdoty( m1[2]), m2.tdotz( m1[2]));
1257 #endif
1258 }
1259 
1260 /*
1261 SIMD_FORCE_INLINE btMatrix3x3 btMultTransposeLeft(const btMatrix3x3& m1, const btMatrix3x3& m2) {
1262 return btMatrix3x3(
1263 m1[0][0] * m2[0][0] + m1[1][0] * m2[1][0] + m1[2][0] * m2[2][0],
1264 m1[0][0] * m2[0][1] + m1[1][0] * m2[1][1] + m1[2][0] * m2[2][1],
1265 m1[0][0] * m2[0][2] + m1[1][0] * m2[1][2] + m1[2][0] * m2[2][2],
1266 m1[0][1] * m2[0][0] + m1[1][1] * m2[1][0] + m1[2][1] * m2[2][0],
1267 m1[0][1] * m2[0][1] + m1[1][1] * m2[1][1] + m1[2][1] * m2[2][1],
1268 m1[0][1] * m2[0][2] + m1[1][1] * m2[1][2] + m1[2][1] * m2[2][2],
1269 m1[0][2] * m2[0][0] + m1[1][2] * m2[1][0] + m1[2][2] * m2[2][0],
1270 m1[0][2] * m2[0][1] + m1[1][2] * m2[1][1] + m1[2][2] * m2[2][1],
1271 m1[0][2] * m2[0][2] + m1[1][2] * m2[1][2] + m1[2][2] * m2[2][2]);
1272 }
1273 */
1274 
1278 {
1279 #if (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))
1280 
1281  __m128 c0, c1, c2;
1282 
1283  c0 = _mm_cmpeq_ps(m1[0].mVec128, m2[0].mVec128);
1284  c1 = _mm_cmpeq_ps(m1[1].mVec128, m2[1].mVec128);
1285  c2 = _mm_cmpeq_ps(m1[2].mVec128, m2[2].mVec128);
1286 
1287  c0 = _mm_and_ps(c0, c1);
1288  c0 = _mm_and_ps(c0, c2);
1289 
1290  int m = _mm_movemask_ps((__m128)c0);
1291  return (0x7 == (m & 0x7));
1292 
1293 #else
1294  return
1295  ( m1[0][0] == m2[0][0] && m1[1][0] == m2[1][0] && m1[2][0] == m2[2][0] &&
1296  m1[0][1] == m2[0][1] && m1[1][1] == m2[1][1] && m1[2][1] == m2[2][1] &&
1297  m1[0][2] == m2[0][2] && m1[1][2] == m2[1][2] && m1[2][2] == m2[2][2] );
1298 #endif
1299 }
1300 
1303 {
1305 };
1306 
1309 {
1311 };
1312 
1313 
1314 
1315 
1317 {
1318  for (int i=0;i<3;i++)
1319  m_el[i].serialize(dataOut.m_el[i]);
1320 }
1321 
1323 {
1324  for (int i=0;i<3;i++)
1325  m_el[i].serializeFloat(dataOut.m_el[i]);
1326 }
1327 
1328 
1330 {
1331  for (int i=0;i<3;i++)
1332  m_el[i].deSerialize(dataIn.m_el[i]);
1333 }
1334 
1336 {
1337  for (int i=0;i<3;i++)
1338  m_el[i].deSerializeFloat(dataIn.m_el[i]);
1339 }
1340 
1342 {
1343  for (int i=0;i<3;i++)
1344  m_el[i].deSerializeDouble(dataIn.m_el[i]);
1345 }
1346 
1347 #endif //BT_MATRIX3x3_H
1348 
const btScalar & x() const
Return the x value.
Definition: btQuadWord.h:116
void deSerializeFloat(const struct btMatrix3x3FloatData &dataIn)
Definition: btMatrix3x3.h:1335
#define SIMD_EPSILON
Definition: btScalar.h:521
for serialization
Definition: btMatrix3x3.h:1302
void serialize(struct btMatrix3x3Data &dataOut) const
Definition: btMatrix3x3.h:1316
btVector3DoubleData m_el[3]
Definition: btMatrix3x3.h:1310
bool operator==(const btMatrix3x3 &m1, const btMatrix3x3 &m2)
Equality operator between two matrices It will test all elements are equal.
Definition: btMatrix3x3.h:1277
btScalar tdoty(const btVector3 &v) const
Definition: btMatrix3x3.h:641
btScalar tdotx(const btVector3 &v) const
Definition: btMatrix3x3.h:637
btMatrix3x3 timesTranspose(const btMatrix3x3 &m) const
Definition: btMatrix3x3.h:1072
btScalar norm() const
Return the norm (length) of the vector.
Definition: btVector3.h:269
void setValue(const btScalar &_x, const btScalar &_y, const btScalar &_z)
Definition: btVector3.h:652
btVector3 solve33(const btVector3 &b) const
Solve A * x = b, where b is a column vector.
Definition: btMatrix3x3.h:616
void setRotation(const btQuaternion &q)
Set the matrix from a quaternion.
Definition: btMatrix3x3.h:209
void serializeFloat(struct btMatrix3x3FloatData &dataOut) const
Definition: btMatrix3x3.h:1322
btScalar tdotz(const btVector3 &v) const
Definition: btMatrix3x3.h:645
btVector3 dot3(const btVector3 &v0, const btVector3 &v1, const btVector3 &v2) const
Definition: btVector3.h:731
btScalar btSin(btScalar x)
Definition: btScalar.h:477
btMatrix3x3 & operator *=(const btMatrix3x3 &m)
Multiply by the target matrix on the right.
Definition: btMatrix3x3.h:724
static const btQuaternion & getIdentity()
Definition: btQuaternion.h:595
const btVector3 & getRow(int i) const
Get a row of the matrix as a vector.
Definition: btMatrix3x3.h:142
btScalar btSqrt(btScalar y)
Definition: btScalar.h:444
#define btAssert(x)
Definition: btScalar.h:131
#define SIMD_FORCE_INLINE
Definition: btScalar.h:81
btScalar cofac(int r1, int c1, int r2, int c2) const
Calculate the matrix cofactor.
Definition: btMatrix3x3.h:705
void diagonalize(btMatrix3x3 &rot, btScalar tolerance=1.0e-9, int maxIter=100)
diagonalizes this matrix
Definition: btMatrix3x3.h:682
btMatrix3x3 operator+(const btMatrix3x3 &m1, const btMatrix3x3 &m2)
Definition: btMatrix3x3.h:858
btMatrix3x3 & operator=(const btMatrix3x3 &other)
Assignment Operator.
Definition: btMatrix3x3.h:122
btQuaternion inverse(const btQuaternion &q)
Return the inverse of a quaternion.
Definition: btQuaternion.h:900
#define btFullAssert(x)
Definition: btScalar.h:134
#define SIMD_HALF_PI
Definition: btScalar.h:506
btVector3 m_el[3]
Data storage for the matrix, each vector is a row of the matrix.
Definition: btMatrix3x3.h:51
btMatrix3x3 transpose() const
Return the transpose of the matrix.
Definition: btMatrix3x3.h:958
btMatrix3x3(const btQuaternion &q)
Constructor from Quaternion.
Definition: btMatrix3x3.h:60
btVector3 btCross(const btVector3 &v1, const btVector3 &v2)
Return the cross product of two vectors.
Definition: btVector3.h:931
btQuaternion inverse() const
Return the inverse of this quaternion.
Definition: btQuaternion.h:482
btMatrix3x3 operator *(const btMatrix3x3 &m, const btScalar &k)
Definition: btMatrix3x3.h:836
btVector3 getColumn(int i) const
Get a column of the matrix as a vector.
Definition: btMatrix3x3.h:134
void deSerialize(const struct btMatrix3x3Data &dataIn)
Definition: btMatrix3x3.h:1329
#define SIMD_PI
Definition: btScalar.h:504
btMatrix3x3 transposeTimes(const btMatrix3x3 &m) const
Definition: btMatrix3x3.h:1016
const btScalar & x() const
Return the x value.
Definition: btVector3.h:587
#define btMatrix3x3Data
Definition: btMatrix3x3.h:42
btQuaternion & normalize()
Normalize the quaternion Such that x^2 + y^2 + z^2 +w^2 = 1.
Definition: btQuaternion.h:369
btMatrix3x3 adjoint() const
Return the adjoint of the matrix.
Definition: btMatrix3x3.h:995
void deSerializeDouble(const struct btMatrix3x3DoubleData &dataIn)
Definition: btMatrix3x3.h:1341
btVector3 cross(const btVector3 &v) const
Return the cross product between this and another vector.
Definition: btVector3.h:389
btScalar dot(const btVector3 &v) const
Return the dot product.
Definition: btVector3.h:235
btScalar btAtan2(btScalar x, btScalar y)
Definition: btScalar.h:496
void setValue(const btScalar &_x, const btScalar &_y, const btScalar &_z)
Set x,y,z and zero w.
Definition: btQuadWord.h:152
btMatrix3x3 scaled(const btVector3 &s) const
Create a scaled copy of the matrix.
Definition: btMatrix3x3.h:590
const btScalar & y() const
Return the y value.
Definition: btVector3.h:589
const btScalar & z() const
Return the z value.
Definition: btVector3.h:591
void getEulerZYX(btScalar &yaw, btScalar &pitch, btScalar &roll, unsigned int solution_number=1) const
Get the matrix represented as euler angles around ZYX.
Definition: btMatrix3x3.h:521
void setValue(const btScalar &xx, const btScalar &xy, const btScalar &xz, const btScalar &yx, const btScalar &yy, const btScalar &yz, const btScalar &zx, const btScalar &zy, const btScalar &zz)
Set the values of the matrix explicitly (row major)
Definition: btMatrix3x3.h:198
btMatrix3x3(const btScalar &xx, const btScalar &xy, const btScalar &xz, const btScalar &yx, const btScalar &yy, const btScalar &yz, const btScalar &zx, const btScalar &zy, const btScalar &zz)
Constructor with row major formatting.
Definition: btMatrix3x3.h:69
const btScalar & z() const
Return the z value.
Definition: btQuadWord.h:120
void getEulerYPR(btScalar &yaw, btScalar &pitch, btScalar &roll) const
Get the matrix represented as euler angles around YXZ, roundtrip with setEulerYPR.
Definition: btMatrix3x3.h:492
btVector3 can be used to represent 3D points and vectors.
Definition: btVector3.h:83
#define ATTRIBUTE_ALIGNED16(a)
Definition: btScalar.h:82
btMatrix3x3 & operator-=(const btMatrix3x3 &m)
Substractss by the target matrix on the right.
Definition: btMatrix3x3.h:907
void setEulerYPR(const btScalar &yaw, const btScalar &pitch, const btScalar &roll)
Set the matrix from euler angles using YPR around YXZ respectively.
Definition: btMatrix3x3.h:284
btMatrix3x3 & operator+=(const btMatrix3x3 &m)
Adds by the target matrix on the right.
Definition: btMatrix3x3.h:814
btMatrix3x3 operator-(const btMatrix3x3 &m1, const btMatrix3x3 &m2)
Definition: btMatrix3x3.h:882
btScalar determinant() const
Return the determinant of the matrix.
Definition: btMatrix3x3.h:930
btMatrix3x3 absolute() const
Return the matrix with all values non negative.
Definition: btMatrix3x3.h:937
btMatrix3x3()
No initializaion constructor.
Definition: btMatrix3x3.h:55
void getOpenGLSubMatrix(btScalar *m) const
Fill the rotational part of an OpenGL matrix and clear the shear/perspective.
Definition: btMatrix3x3.h:347
const btVector3 & operator[](int i) const
Get a const reference to a row of the matrix as a vector.
Definition: btMatrix3x3.h:158
btScalar length2() const
Return the length squared of the quaternion.
Definition: btQuaternion.h:348
btVector3 & operator[](int i)
Get a mutable reference to a row of the matrix as a vector.
Definition: btMatrix3x3.h:150
The btMatrix3x3 class implements a 3x3 rotation matrix, to perform linear algebra in combination with...
Definition: btMatrix3x3.h:48
const btScalar & y() const
Return the y value.
Definition: btQuadWord.h:118
btScalar dot(const btQuaternion &q1, const btQuaternion &q2)
Calculate the dot product between two quaternions.
Definition: btQuaternion.h:878
btMatrix3x3(const btMatrix3x3 &other)
Copy constructor.
Definition: btMatrix3x3.h:114
for serialization
Definition: btMatrix3x3.h:1308
The btQuaternion implements quaternion to perform linear algebra rotations in combination with btMatr...
Definition: btQuaternion.h:55
void setFromOpenGLSubMatrix(const btScalar *m)
Set from the rotational part of a 4x4 OpenGL matrix.
Definition: btMatrix3x3.h:181
btScalar btAsin(btScalar x)
Definition: btScalar.h:487
btScalar btDot(const btVector3 &v1, const btVector3 &v2)
Return the dot product between two vectors.
Definition: btVector3.h:901
btMatrix3x3 inverse() const
Return the inverse of the matrix.
Definition: btMatrix3x3.h:1003
const btScalar & w() const
Return the w value.
Definition: btQuadWord.h:122
btScalar btTriple(const btVector3 &v1, const btVector3 &v2, const btVector3 &v3)
Definition: btVector3.h:937
void getRotation(btQuaternion &q) const
Get the matrix represented as a quaternion.
Definition: btMatrix3x3.h:400
void setIdentity()
Set the matrix to the identity.
Definition: btMatrix3x3.h:317
static const btMatrix3x3 & getIdentity()
Definition: btMatrix3x3.h:330
float btScalar
The btScalar type abstracts floating point numbers, to easily switch between double and single floati...
Definition: btScalar.h:292
void extractRotation(btQuaternion &q, btScalar tolerance=1.0e-9, int maxIter=100)
extractRotation is from "A robust method to extract the rotational part of deformations" See http://d...
Definition: btMatrix3x3.h:652
btScalar btCos(btScalar x)
Definition: btScalar.h:476
btVector3FloatData m_el[3]
Definition: btMatrix3x3.h:1304
btScalar btFabs(btScalar x)
Definition: btScalar.h:475
void setEulerZYX(btScalar eulerX, btScalar eulerY, btScalar eulerZ)
Set the matrix from euler angles YPR around ZYX axes.
Definition: btMatrix3x3.h:298