16 #ifndef BT_MATRIX3x3_H 17 #define BT_MATRIX3x3_H 26 #define vMPPP (_mm_set_ps (+0.0f, +0.0f, +0.0f, -0.0f)) 29 #if defined(BT_USE_SSE) 30 #define v1000 (_mm_set_ps(0.0f,0.0f,0.0f,1.0f)) 31 #define v0100 (_mm_set_ps(0.0f,0.0f,1.0f,0.0f)) 32 #define v0010 (_mm_set_ps(0.0f,1.0f,0.0f,0.0f)) 33 #elif defined(BT_USE_NEON) 39 #ifdef BT_USE_DOUBLE_PRECISION 40 #define btMatrix3x3Data btMatrix3x3DoubleData 42 #define btMatrix3x3Data btMatrix3x3FloatData 43 #endif //BT_USE_DOUBLE_PRECISION 78 #if (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))|| defined (BT_USE_NEON) 96 m_el[0].mVec128 = rhs.
m_el[0].mVec128;
97 m_el[1].mVec128 = rhs.
m_el[1].mVec128;
98 m_el[2].mVec128 = rhs.
m_el[2].mVec128;
104 m_el[0].mVec128 = m.
m_el[0].mVec128;
105 m_el[1].mVec128 = m.
m_el[1].mVec128;
106 m_el[2].mVec128 = m.
m_el[2].mVec128;
116 m_el[0] = other.
m_el[0];
117 m_el[1] = other.
m_el[1];
118 m_el[2] = other.
m_el[2];
124 m_el[0] = other.
m_el[0];
125 m_el[1] = other.
m_el[1];
126 m_el[2] = other.
m_el[2];
136 return btVector3(m_el[0][i],m_el[1][i],m_el[2][i]);
215 #if defined BT_USE_SIMD_VECTOR3 && defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE) 216 __m128 vs, Q = q.get128();
217 __m128i Qi = btCastfTo128i(Q);
220 __m128 V11, V21, V31;
221 __m128 NQ = _mm_xor_ps(Q, btvMzeroMask);
222 __m128i NQi = btCastfTo128i(NQ);
224 V1 = btCastiTo128f(_mm_shuffle_epi32 (Qi, BT_SHUFFLE(1,0,2,3)));
225 V2 = _mm_shuffle_ps(NQ, Q, BT_SHUFFLE(0,0,1,3));
226 V3 = btCastiTo128f(_mm_shuffle_epi32 (Qi, BT_SHUFFLE(2,1,0,3)));
227 V1 = _mm_xor_ps(V1, vMPPP);
229 V11 = btCastiTo128f(_mm_shuffle_epi32 (Qi, BT_SHUFFLE(1,1,0,3)));
230 V21 = _mm_unpackhi_ps(Q, Q);
231 V31 = _mm_shuffle_ps(Q, NQ, BT_SHUFFLE(0,2,0,3));
237 V11 = _mm_shuffle_ps(NQ, Q, BT_SHUFFLE(2,3,1,3));
239 V21 = _mm_xor_ps(V21, vMPPP);
240 V31 = _mm_shuffle_ps(Q, NQ, BT_SHUFFLE(3,3,1,3));
241 V31 = _mm_xor_ps(V31, vMPPP);
242 Y = btCastiTo128f(_mm_shuffle_epi32 (NQi, BT_SHUFFLE(3,2,0,3)));
243 Z = btCastiTo128f(_mm_shuffle_epi32 (Qi, BT_SHUFFLE(1,0,1,3)));
245 vs = _mm_load_ss(&s);
253 vs = bt_splat3_ps(vs, 0);
267 btScalar xs = q.
x() * s, ys = q.
y() * s, zs = q.
z() * s;
268 btScalar wx = q.
w() * xs, wy = q.
w() * ys, wz = q.
w() * zs;
269 btScalar xx = q.
x() * xs, xy = q.
x() * ys, xz = q.
x() * zs;
270 btScalar yy = q.
y() * ys, yz = q.
y() * zs, zz = q.
z() * zs;
272 btScalar(1.0) - (yy + zz), xy - wz, xz + wy,
273 xy + wz,
btScalar(1.0) - (xx + zz), yz - wx,
274 xz - wy, yz + wx,
btScalar(1.0) - (xx + yy));
286 setEulerZYX(roll, pitch, yaw);
311 setValue(cj * ch, sj * sc - cs, sj * cc + ss,
312 cj * sh, sj * ss + cc, sj * cs - sc,
313 -sj, cj * si, cj * ci);
319 #if (defined(BT_USE_SSE_IN_API)&& defined (BT_USE_SSE)) || defined(BT_USE_NEON) 332 #if (defined(BT_USE_SSE_IN_API)&& defined (BT_USE_SSE)) || defined(BT_USE_NEON) 334 identityMatrix(v1000, v0100, v0010);
342 return identityMatrix;
349 #if defined BT_USE_SIMD_VECTOR3 && defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE) 350 __m128 v0 = m_el[0].mVec128;
351 __m128 v1 = m_el[1].mVec128;
352 __m128 v2 = m_el[2].mVec128;
353 __m128 *vm = (__m128 *)m;
356 v2 = _mm_and_ps(v2, btvFFF0fMask);
358 vT = _mm_unpackhi_ps(v0, v1);
359 v0 = _mm_unpacklo_ps(v0, v1);
361 v1 = _mm_shuffle_ps(v0, v2, BT_SHUFFLE(2, 3, 1, 3) );
362 v0 = _mm_shuffle_ps(v0, v2, BT_SHUFFLE(0, 1, 0, 3) );
363 v2 = btCastdTo128f(_mm_move_sd(btCastfTo128d(v2), btCastfTo128d(vT)));
368 #elif defined(BT_USE_NEON) 370 static const uint32x2_t zMask = (
const uint32x2_t) {
static_cast<uint32_t>(-1), 0 };
371 float32x4_t *vm = (float32x4_t *)m;
372 float32x4x2_t top = vtrnq_f32( m_el[0].mVec128, m_el[1].mVec128 );
373 float32x2x2_t bl = vtrn_f32( vget_low_f32(m_el[2].mVec128), vdup_n_f32(0.0f) );
374 float32x4_t v0 = vcombine_f32( vget_low_f32(top.val[0]), bl.val[0] );
375 float32x4_t v1 = vcombine_f32( vget_low_f32(top.val[1]), bl.val[1] );
376 float32x2_t q = (float32x2_t) vand_u32( (uint32x2_t) vget_high_f32( m_el[2].mVec128), zMask );
377 float32x4_t v2 = vcombine_f32( vget_high_f32(top.val[0]), q );
402 #if (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))|| defined (BT_USE_NEON) 403 btScalar trace = m_el[0].
x() + m_el[1].
y() + m_el[2].
z();
415 temp.f[0]=m_el[2].
y() - m_el[1].
z();
416 temp.f[1]=m_el[0].
z() - m_el[2].
x();
417 temp.f[2]=m_el[1].
x() - m_el[0].
y();
424 if(m_el[0].x() < m_el[1].y())
426 if( m_el[1].y() < m_el[2].z() )
427 { i = 2; j = 0; k = 1; }
429 { i = 1; j = 2; k = 0; }
433 if( m_el[0].x() < m_el[2].z())
434 { i = 2; j = 0; k = 1; }
436 { i = 0; j = 1; k = 2; }
439 x = m_el[i][i] - m_el[j][j] - m_el[k][k] +
btScalar(1.0);
441 temp.f[3] = (m_el[k][j] - m_el[j][k]);
442 temp.f[j] = (m_el[j][i] + m_el[i][j]);
443 temp.f[k] = (m_el[k][i] + m_el[i][k]);
454 btScalar trace = m_el[0].
x() + m_el[1].
y() + m_el[2].
z();
464 temp[0]=((m_el[2].
y() - m_el[1].
z()) * s);
465 temp[1]=((m_el[0].
z() - m_el[2].
x()) * s);
466 temp[2]=((m_el[1].
x() - m_el[0].
y()) * s);
470 int i = m_el[0].
x() < m_el[1].
y() ?
471 (m_el[1].
y() < m_el[2].
z() ? 2 : 1) :
472 (m_el[0].x() < m_el[2].
z() ? 2 : 0);
480 temp[3] = (m_el[k][j] - m_el[j][k]) * s;
481 temp[j] = (m_el[j][i] + m_el[i][j]) * s;
482 temp[k] = (m_el[k][i] + m_el[i][k]) * s;
484 q.
setValue(temp[0],temp[1],temp[2],temp[3]);
535 if (
btFabs(m_el[2].x()) >= 1)
546 euler_out.roll = euler_out.pitch + delta;
547 euler_out2.roll = euler_out.pitch + delta;
553 euler_out.roll = -euler_out.pitch + delta;
554 euler_out2.roll = -euler_out.pitch + delta;
559 euler_out.pitch = -
btAsin(m_el[2].x());
560 euler_out2.pitch =
SIMD_PI - euler_out.pitch;
562 euler_out.roll =
btAtan2(m_el[2].y()/
btCos(euler_out.pitch),
563 m_el[2].
z()/
btCos(euler_out.pitch));
564 euler_out2.roll =
btAtan2(m_el[2].y()/
btCos(euler_out2.pitch),
565 m_el[2].
z()/
btCos(euler_out2.pitch));
567 euler_out.yaw =
btAtan2(m_el[1].x()/
btCos(euler_out.pitch),
568 m_el[0].
x()/
btCos(euler_out.pitch));
569 euler_out2.yaw =
btAtan2(m_el[1].x()/
btCos(euler_out2.pitch),
570 m_el[0].
x()/
btCos(euler_out2.pitch));
573 if (solution_number == 1)
576 pitch = euler_out.pitch;
577 roll = euler_out.roll;
581 yaw = euler_out2.yaw;
582 pitch = euler_out2.pitch;
583 roll = euler_out2.roll;
592 #if (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))|| defined (BT_USE_NEON) 593 return btMatrix3x3(m_el[0] * s, m_el[1] * s, m_el[2] * s);
596 m_el[0].x() * s.
x(), m_el[0].
y() * s.y(), m_el[0].
z() * s.z(),
597 m_el[1].
x() * s.x(), m_el[1].
y() * s.y(), m_el[1].
z() * s.z(),
598 m_el[2].
x() * s.x(), m_el[2].
y() * s.y(), m_el[2].
z() * s.z());
639 return m_el[0].
x() * v.
x() + m_el[1].
x() * v.
y() + m_el[2].
x() * v.
z();
643 return m_el[0].
y() * v.
x() + m_el[1].
y() * v.
y() + m_el[2].
y() * v.
z();
647 return m_el[0].
z() * v.
x() + m_el[1].
z() * v.
y() + m_el[2].
z() * v.
z();
663 for (
int step = maxSteps; step > 0; step--)
698 btScalar theta = (m_el[q][q] - m_el[p][p]) / (2 * mpq);
704 t = (theta >= 0) ? 1 / (theta +
btSqrt(1 + theta2))
705 : 1 / (theta -
btSqrt(1 + theta2));
706 cos = 1 /
btSqrt(1 + t * t);
712 t = 1 / (theta * (2 +
btScalar(0.5) / theta2));
718 m_el[p][q] = m_el[q][p] = 0;
719 m_el[p][p] -= t * mpq;
720 m_el[q][q] += t * mpq;
723 m_el[r][p] = m_el[p][r] = cos * mrp - sin * mrq;
724 m_el[r][q] = m_el[q][r] = cos * mrq + sin * mrp;
727 for (
int i = 0; i < 3; i++)
732 row[p] = cos * mrp - sin * mrq;
733 row[q] = cos * mrq + sin * mrp;
750 return m_el[r1][c1] * m_el[r2][c2] - m_el[r1][c2] * m_el[r2][c1];
769 #if defined BT_USE_SIMD_VECTOR3 && defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE) 770 __m128 rv00, rv01, rv02;
771 __m128 rv10, rv11, rv12;
772 __m128 rv20, rv21, rv22;
773 __m128 mv0, mv1, mv2;
775 rv02 = m_el[0].mVec128;
776 rv12 = m_el[1].mVec128;
777 rv22 = m_el[2].mVec128;
779 mv0 = _mm_and_ps(m[0].mVec128, btvFFF0fMask);
780 mv1 = _mm_and_ps(m[1].mVec128, btvFFF0fMask);
781 mv2 = _mm_and_ps(m[2].mVec128, btvFFF0fMask);
784 rv00 = bt_splat_ps(rv02, 0);
785 rv01 = bt_splat_ps(rv02, 1);
786 rv02 = bt_splat_ps(rv02, 2);
788 rv00 = _mm_mul_ps(rv00, mv0);
789 rv01 = _mm_mul_ps(rv01, mv1);
790 rv02 = _mm_mul_ps(rv02, mv2);
793 rv10 = bt_splat_ps(rv12, 0);
794 rv11 = bt_splat_ps(rv12, 1);
795 rv12 = bt_splat_ps(rv12, 2);
797 rv10 = _mm_mul_ps(rv10, mv0);
798 rv11 = _mm_mul_ps(rv11, mv1);
799 rv12 = _mm_mul_ps(rv12, mv2);
802 rv20 = bt_splat_ps(rv22, 0);
803 rv21 = bt_splat_ps(rv22, 1);
804 rv22 = bt_splat_ps(rv22, 2);
806 rv20 = _mm_mul_ps(rv20, mv0);
807 rv21 = _mm_mul_ps(rv21, mv1);
808 rv22 = _mm_mul_ps(rv22, mv2);
810 rv00 = _mm_add_ps(rv00, rv01);
811 rv10 = _mm_add_ps(rv10, rv11);
812 rv20 = _mm_add_ps(rv20, rv21);
814 m_el[0].mVec128 = _mm_add_ps(rv00, rv02);
815 m_el[1].mVec128 = _mm_add_ps(rv10, rv12);
816 m_el[2].mVec128 = _mm_add_ps(rv20, rv22);
818 #elif defined(BT_USE_NEON) 820 float32x4_t rv0, rv1, rv2;
821 float32x4_t v0, v1, v2;
822 float32x4_t mv0, mv1, mv2;
824 v0 = m_el[0].mVec128;
825 v1 = m_el[1].mVec128;
826 v2 = m_el[2].mVec128;
828 mv0 = (float32x4_t) vandq_s32((int32x4_t)m[0].mVec128, btvFFF0Mask);
829 mv1 = (float32x4_t) vandq_s32((int32x4_t)m[1].mVec128, btvFFF0Mask);
830 mv2 = (float32x4_t) vandq_s32((int32x4_t)m[2].mVec128, btvFFF0Mask);
832 rv0 = vmulq_lane_f32(mv0, vget_low_f32(v0), 0);
833 rv1 = vmulq_lane_f32(mv0, vget_low_f32(v1), 0);
834 rv2 = vmulq_lane_f32(mv0, vget_low_f32(v2), 0);
836 rv0 = vmlaq_lane_f32(rv0, mv1, vget_low_f32(v0), 1);
837 rv1 = vmlaq_lane_f32(rv1, mv1, vget_low_f32(v1), 1);
838 rv2 = vmlaq_lane_f32(rv2, mv1, vget_low_f32(v2), 1);
840 rv0 = vmlaq_lane_f32(rv0, mv2, vget_high_f32(v0), 0);
841 rv1 = vmlaq_lane_f32(rv1, mv2, vget_high_f32(v1), 0);
842 rv2 = vmlaq_lane_f32(rv2, mv2, vget_high_f32(v2), 0);
844 m_el[0].mVec128 = rv0;
845 m_el[1].mVec128 = rv1;
846 m_el[2].mVec128 = rv2;
859 #if (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))|| defined (BT_USE_NEON) 860 m_el[0].mVec128 = m_el[0].mVec128 + m.
m_el[0].mVec128;
861 m_el[1].mVec128 = m_el[1].mVec128 + m.
m_el[1].mVec128;
862 m_el[2].mVec128 = m_el[2].mVec128 + m.
m_el[2].mVec128;
865 m_el[0][0]+m.
m_el[0][0],
866 m_el[0][1]+m.
m_el[0][1],
867 m_el[0][2]+m.
m_el[0][2],
868 m_el[1][0]+m.
m_el[1][0],
869 m_el[1][1]+m.
m_el[1][1],
870 m_el[1][2]+m.
m_el[1][2],
871 m_el[2][0]+m.
m_el[2][0],
872 m_el[2][1]+m.
m_el[2][1],
873 m_el[2][2]+m.
m_el[2][2]);
881 #if (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)) 882 __m128 vk = bt_splat_ps(_mm_load_ss((
float *)&k), 0x80);
884 _mm_mul_ps(m[0].mVec128, vk),
885 _mm_mul_ps(m[1].mVec128, vk),
886 _mm_mul_ps(m[2].mVec128, vk));
887 #elif defined(BT_USE_NEON) 889 vmulq_n_f32(m[0].mVec128, k),
890 vmulq_n_f32(m[1].mVec128, k),
891 vmulq_n_f32(m[2].mVec128, k));
894 m[0].x()*k,m[0].y()*k,m[0].z()*k,
895 m[1].x()*k,m[1].y()*k,m[1].z()*k,
896 m[2].x()*k,m[2].y()*k,m[2].z()*k);
903 #if (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))|| defined (BT_USE_NEON) 905 m1[0].mVec128 + m2[0].mVec128,
906 m1[1].mVec128 + m2[1].mVec128,
907 m1[2].mVec128 + m2[2].mVec128);
927 #if (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))|| defined (BT_USE_NEON) 929 m1[0].mVec128 - m2[0].mVec128,
930 m1[1].mVec128 - m2[1].mVec128,
931 m1[2].mVec128 - m2[2].mVec128);
952 #if (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))|| defined (BT_USE_NEON) 953 m_el[0].mVec128 = m_el[0].mVec128 - m.
m_el[0].mVec128;
954 m_el[1].mVec128 = m_el[1].mVec128 - m.
m_el[1].mVec128;
955 m_el[2].mVec128 = m_el[2].mVec128 - m.
m_el[2].mVec128;
958 m_el[0][0]-m.
m_el[0][0],
959 m_el[0][1]-m.
m_el[0][1],
960 m_el[0][2]-m.
m_el[0][2],
961 m_el[1][0]-m.
m_el[1][0],
962 m_el[1][1]-m.
m_el[1][1],
963 m_el[1][2]-m.
m_el[1][2],
964 m_el[2][0]-m.
m_el[2][0],
965 m_el[2][1]-m.
m_el[2][1],
966 m_el[2][2]-m.
m_el[2][2]);
975 return btTriple((*
this)[0], (*
this)[1], (*
this)[2]);
982 #if defined BT_USE_SIMD_VECTOR3 && (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)) 984 _mm_and_ps(m_el[0].mVec128, btvAbsfMask),
985 _mm_and_ps(m_el[1].mVec128, btvAbsfMask),
986 _mm_and_ps(m_el[2].mVec128, btvAbsfMask));
987 #elif defined(BT_USE_NEON) 989 (float32x4_t)vandq_s32((int32x4_t)m_el[0].mVec128, btv3AbsMask),
990 (float32x4_t)vandq_s32((int32x4_t)m_el[1].mVec128, btv3AbsMask),
991 (float32x4_t)vandq_s32((int32x4_t)m_el[2].mVec128, btv3AbsMask));
1003 #if defined BT_USE_SIMD_VECTOR3 && (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)) 1004 __m128 v0 = m_el[0].mVec128;
1005 __m128 v1 = m_el[1].mVec128;
1006 __m128 v2 = m_el[2].mVec128;
1009 v2 = _mm_and_ps(v2, btvFFF0fMask);
1011 vT = _mm_unpackhi_ps(v0, v1);
1012 v0 = _mm_unpacklo_ps(v0, v1);
1014 v1 = _mm_shuffle_ps(v0, v2, BT_SHUFFLE(2, 3, 1, 3) );
1015 v0 = _mm_shuffle_ps(v0, v2, BT_SHUFFLE(0, 1, 0, 3) );
1016 v2 = btCastdTo128f(_mm_move_sd(btCastfTo128d(v2), btCastfTo128d(vT)));
1020 #elif defined(BT_USE_NEON) 1022 static const uint32x2_t zMask = (
const uint32x2_t) {
static_cast<uint32_t>(-1), 0 };
1023 float32x4x2_t top = vtrnq_f32( m_el[0].mVec128, m_el[1].mVec128 );
1024 float32x2x2_t bl = vtrn_f32( vget_low_f32(m_el[2].mVec128), vdup_n_f32(0.0f) );
1025 float32x4_t v0 = vcombine_f32( vget_low_f32(top.val[0]), bl.val[0] );
1026 float32x4_t v1 = vcombine_f32( vget_low_f32(top.val[1]), bl.val[1] );
1027 float32x2_t q = (float32x2_t) vand_u32( (uint32x2_t) vget_high_f32( m_el[2].mVec128), zMask );
1028 float32x4_t v2 = vcombine_f32( vget_high_f32(top.val[0]), q );
1031 return btMatrix3x3( m_el[0].x(), m_el[1].x(), m_el[2].x(),
1032 m_el[0].y(), m_el[1].y(), m_el[2].y(),
1033 m_el[0].z(), m_el[1].z(), m_el[2].z());
1040 return btMatrix3x3(cofac(1, 1, 2, 2), cofac(0, 2, 2, 1), cofac(0, 1, 1, 2),
1041 cofac(1, 2, 2, 0), cofac(0, 0, 2, 2), cofac(0, 2, 1, 0),
1042 cofac(1, 0, 2, 1), cofac(0, 1, 2, 0), cofac(0, 0, 1, 1));
1048 btVector3 co(cofac(1, 1, 2, 2), cofac(1, 2, 2, 0), cofac(1, 0, 2, 1));
1052 return btMatrix3x3(co.
x() * s, cofac(0, 2, 2, 1) * s, cofac(0, 1, 1, 2) * s,
1053 co.
y() * s, cofac(0, 0, 2, 2) * s, cofac(0, 2, 1, 0) * s,
1054 co.
z() * s, cofac(0, 1, 2, 0) * s, cofac(0, 0, 1, 1) * s);
1060 #if defined BT_USE_SIMD_VECTOR3 && (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)) 1063 __m128 row = m_el[0].mVec128;
1064 __m128 m0 = _mm_and_ps( m.
getRow(0).mVec128, btvFFF0fMask );
1065 __m128 m1 = _mm_and_ps( m.
getRow(1).mVec128, btvFFF0fMask);
1066 __m128 m2 = _mm_and_ps( m.
getRow(2).mVec128, btvFFF0fMask );
1067 __m128 r0 = _mm_mul_ps(m0, _mm_shuffle_ps(row, row, 0));
1068 __m128 r1 = _mm_mul_ps(m0, _mm_shuffle_ps(row, row, 0x55));
1069 __m128 r2 = _mm_mul_ps(m0, _mm_shuffle_ps(row, row, 0xaa));
1070 row = m_el[1].mVec128;
1071 r0 = _mm_add_ps( r0, _mm_mul_ps(m1, _mm_shuffle_ps(row, row, 0)));
1072 r1 = _mm_add_ps( r1, _mm_mul_ps(m1, _mm_shuffle_ps(row, row, 0x55)));
1073 r2 = _mm_add_ps( r2, _mm_mul_ps(m1, _mm_shuffle_ps(row, row, 0xaa)));
1074 row = m_el[2].mVec128;
1075 r0 = _mm_add_ps( r0, _mm_mul_ps(m2, _mm_shuffle_ps(row, row, 0)));
1076 r1 = _mm_add_ps( r1, _mm_mul_ps(m2, _mm_shuffle_ps(row, row, 0x55)));
1077 r2 = _mm_add_ps( r2, _mm_mul_ps(m2, _mm_shuffle_ps(row, row, 0xaa)));
1080 #elif defined BT_USE_NEON 1082 static const uint32x4_t xyzMask = (
const uint32x4_t){
static_cast<uint32_t>(-1), static_cast<uint32_t>(-1),
static_cast<uint32_t>(-1), 0 };
1083 float32x4_t m0 = (float32x4_t) vandq_u32( (uint32x4_t) m.
getRow(0).mVec128, xyzMask );
1084 float32x4_t m1 = (float32x4_t) vandq_u32( (uint32x4_t) m.
getRow(1).mVec128, xyzMask );
1085 float32x4_t m2 = (float32x4_t) vandq_u32( (uint32x4_t) m.
getRow(2).mVec128, xyzMask );
1086 float32x4_t row = m_el[0].mVec128;
1087 float32x4_t r0 = vmulq_lane_f32( m0, vget_low_f32(row), 0);
1088 float32x4_t r1 = vmulq_lane_f32( m0, vget_low_f32(row), 1);
1089 float32x4_t r2 = vmulq_lane_f32( m0, vget_high_f32(row), 0);
1090 row = m_el[1].mVec128;
1091 r0 = vmlaq_lane_f32( r0, m1, vget_low_f32(row), 0);
1092 r1 = vmlaq_lane_f32( r1, m1, vget_low_f32(row), 1);
1093 r2 = vmlaq_lane_f32( r2, m1, vget_high_f32(row), 0);
1094 row = m_el[2].mVec128;
1095 r0 = vmlaq_lane_f32( r0, m2, vget_low_f32(row), 0);
1096 r1 = vmlaq_lane_f32( r1, m2, vget_low_f32(row), 1);
1097 r2 = vmlaq_lane_f32( r2, m2, vget_high_f32(row), 0);
1101 m_el[0].x() * m[0].x() + m_el[1].x() * m[1].x() + m_el[2].x() * m[2].x(),
1102 m_el[0].x() * m[0].y() + m_el[1].x() * m[1].y() + m_el[2].x() * m[2].y(),
1103 m_el[0].x() * m[0].z() + m_el[1].x() * m[1].z() + m_el[2].x() * m[2].z(),
1104 m_el[0].y() * m[0].x() + m_el[1].y() * m[1].x() + m_el[2].y() * m[2].x(),
1105 m_el[0].y() * m[0].y() + m_el[1].y() * m[1].y() + m_el[2].y() * m[2].y(),
1106 m_el[0].y() * m[0].z() + m_el[1].y() * m[1].z() + m_el[2].y() * m[2].z(),
1107 m_el[0].z() * m[0].x() + m_el[1].z() * m[1].x() + m_el[2].z() * m[2].x(),
1108 m_el[0].z() * m[0].y() + m_el[1].z() * m[1].y() + m_el[2].z() * m[2].y(),
1109 m_el[0].z() * m[0].z() + m_el[1].z() * m[1].z() + m_el[2].z() * m[2].z());
1116 #if (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)) 1117 __m128 a0 = m_el[0].mVec128;
1118 __m128 a1 = m_el[1].mVec128;
1119 __m128 a2 = m_el[2].mVec128;
1122 __m128 mx = mT[0].mVec128;
1123 __m128 my = mT[1].mVec128;
1124 __m128 mz = mT[2].mVec128;
1126 __m128 r0 = _mm_mul_ps(mx, _mm_shuffle_ps(a0, a0, 0x00));
1127 __m128 r1 = _mm_mul_ps(mx, _mm_shuffle_ps(a1, a1, 0x00));
1128 __m128 r2 = _mm_mul_ps(mx, _mm_shuffle_ps(a2, a2, 0x00));
1129 r0 = _mm_add_ps(r0, _mm_mul_ps(my, _mm_shuffle_ps(a0, a0, 0x55)));
1130 r1 = _mm_add_ps(r1, _mm_mul_ps(my, _mm_shuffle_ps(a1, a1, 0x55)));
1131 r2 = _mm_add_ps(r2, _mm_mul_ps(my, _mm_shuffle_ps(a2, a2, 0x55)));
1132 r0 = _mm_add_ps(r0, _mm_mul_ps(mz, _mm_shuffle_ps(a0, a0, 0xaa)));
1133 r1 = _mm_add_ps(r1, _mm_mul_ps(mz, _mm_shuffle_ps(a1, a1, 0xaa)));
1134 r2 = _mm_add_ps(r2, _mm_mul_ps(mz, _mm_shuffle_ps(a2, a2, 0xaa)));
1137 #elif defined BT_USE_NEON 1138 float32x4_t a0 = m_el[0].mVec128;
1139 float32x4_t a1 = m_el[1].mVec128;
1140 float32x4_t a2 = m_el[2].mVec128;
1143 float32x4_t mx = mT[0].mVec128;
1144 float32x4_t my = mT[1].mVec128;
1145 float32x4_t mz = mT[2].mVec128;
1147 float32x4_t r0 = vmulq_lane_f32( mx, vget_low_f32(a0), 0);
1148 float32x4_t r1 = vmulq_lane_f32( mx, vget_low_f32(a1), 0);
1149 float32x4_t r2 = vmulq_lane_f32( mx, vget_low_f32(a2), 0);
1150 r0 = vmlaq_lane_f32( r0, my, vget_low_f32(a0), 1);
1151 r1 = vmlaq_lane_f32( r1, my, vget_low_f32(a1), 1);
1152 r2 = vmlaq_lane_f32( r2, my, vget_low_f32(a2), 1);
1153 r0 = vmlaq_lane_f32( r0, mz, vget_high_f32(a0), 0);
1154 r1 = vmlaq_lane_f32( r1, mz, vget_high_f32(a1), 0);
1155 r2 = vmlaq_lane_f32( r2, mz, vget_high_f32(a2), 0);
1160 m_el[0].
dot(m[0]), m_el[0].
dot(m[1]), m_el[0].
dot(m[2]),
1161 m_el[1].
dot(m[0]), m_el[1].
dot(m[1]), m_el[1].
dot(m[2]),
1162 m_el[2].
dot(m[0]), m_el[2].
dot(m[1]), m_el[2].
dot(m[2]));
1169 #if (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))|| defined (BT_USE_NEON) 1170 return v.
dot3(m[0], m[1], m[2]);
1180 #if defined BT_USE_SIMD_VECTOR3 && (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)) 1182 const __m128 vv = v.mVec128;
1184 __m128 c0 = bt_splat_ps( vv, 0);
1185 __m128 c1 = bt_splat_ps( vv, 1);
1186 __m128 c2 = bt_splat_ps( vv, 2);
1188 c0 = _mm_mul_ps(c0, _mm_and_ps(m[0].mVec128, btvFFF0fMask) );
1189 c1 = _mm_mul_ps(c1, _mm_and_ps(m[1].mVec128, btvFFF0fMask) );
1190 c0 = _mm_add_ps(c0, c1);
1191 c2 = _mm_mul_ps(c2, _mm_and_ps(m[2].mVec128, btvFFF0fMask) );
1194 #elif defined(BT_USE_NEON) 1195 const float32x4_t vv = v.mVec128;
1196 const float32x2_t vlo = vget_low_f32(vv);
1197 const float32x2_t vhi = vget_high_f32(vv);
1199 float32x4_t c0, c1, c2;
1201 c0 = (float32x4_t) vandq_s32((int32x4_t)m[0].mVec128, btvFFF0Mask);
1202 c1 = (float32x4_t) vandq_s32((int32x4_t)m[1].mVec128, btvFFF0Mask);
1203 c2 = (float32x4_t) vandq_s32((int32x4_t)m[2].mVec128, btvFFF0Mask);
1205 c0 = vmulq_lane_f32(c0, vlo, 0);
1206 c1 = vmulq_lane_f32(c1, vlo, 1);
1207 c2 = vmulq_lane_f32(c2, vhi, 0);
1208 c0 = vaddq_f32(c0, c1);
1209 c0 = vaddq_f32(c0, c2);
1220 #if defined BT_USE_SIMD_VECTOR3 && (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)) 1222 __m128 m10 = m1[0].mVec128;
1223 __m128 m11 = m1[1].mVec128;
1224 __m128 m12 = m1[2].mVec128;
1226 __m128 m2v = _mm_and_ps(m2[0].mVec128, btvFFF0fMask);
1228 __m128 c0 = bt_splat_ps( m10, 0);
1229 __m128 c1 = bt_splat_ps( m11, 0);
1230 __m128 c2 = bt_splat_ps( m12, 0);
1232 c0 = _mm_mul_ps(c0, m2v);
1233 c1 = _mm_mul_ps(c1, m2v);
1234 c2 = _mm_mul_ps(c2, m2v);
1236 m2v = _mm_and_ps(m2[1].mVec128, btvFFF0fMask);
1238 __m128 c0_1 = bt_splat_ps( m10, 1);
1239 __m128 c1_1 = bt_splat_ps( m11, 1);
1240 __m128 c2_1 = bt_splat_ps( m12, 1);
1242 c0_1 = _mm_mul_ps(c0_1, m2v);
1243 c1_1 = _mm_mul_ps(c1_1, m2v);
1244 c2_1 = _mm_mul_ps(c2_1, m2v);
1246 m2v = _mm_and_ps(m2[2].mVec128, btvFFF0fMask);
1248 c0 = _mm_add_ps(c0, c0_1);
1249 c1 = _mm_add_ps(c1, c1_1);
1250 c2 = _mm_add_ps(c2, c2_1);
1252 m10 = bt_splat_ps( m10, 2);
1253 m11 = bt_splat_ps( m11, 2);
1254 m12 = bt_splat_ps( m12, 2);
1256 m10 = _mm_mul_ps(m10, m2v);
1257 m11 = _mm_mul_ps(m11, m2v);
1258 m12 = _mm_mul_ps(m12, m2v);
1260 c0 = _mm_add_ps(c0, m10);
1261 c1 = _mm_add_ps(c1, m11);
1262 c2 = _mm_add_ps(c2, m12);
1266 #elif defined(BT_USE_NEON) 1268 float32x4_t rv0, rv1, rv2;
1269 float32x4_t v0, v1, v2;
1270 float32x4_t mv0, mv1, mv2;
1276 mv0 = (float32x4_t) vandq_s32((int32x4_t)m2[0].mVec128, btvFFF0Mask);
1277 mv1 = (float32x4_t) vandq_s32((int32x4_t)m2[1].mVec128, btvFFF0Mask);
1278 mv2 = (float32x4_t) vandq_s32((int32x4_t)m2[2].mVec128, btvFFF0Mask);
1280 rv0 = vmulq_lane_f32(mv0, vget_low_f32(v0), 0);
1281 rv1 = vmulq_lane_f32(mv0, vget_low_f32(v1), 0);
1282 rv2 = vmulq_lane_f32(mv0, vget_low_f32(v2), 0);
1284 rv0 = vmlaq_lane_f32(rv0, mv1, vget_low_f32(v0), 1);
1285 rv1 = vmlaq_lane_f32(rv1, mv1, vget_low_f32(v1), 1);
1286 rv2 = vmlaq_lane_f32(rv2, mv1, vget_low_f32(v2), 1);
1288 rv0 = vmlaq_lane_f32(rv0, mv2, vget_high_f32(v0), 0);
1289 rv1 = vmlaq_lane_f32(rv1, mv2, vget_high_f32(v1), 0);
1290 rv2 = vmlaq_lane_f32(rv2, mv2, vget_high_f32(v2), 0);
1321 #if (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)) 1325 c0 = _mm_cmpeq_ps(m1[0].mVec128, m2[0].mVec128);
1326 c1 = _mm_cmpeq_ps(m1[1].mVec128, m2[1].mVec128);
1327 c2 = _mm_cmpeq_ps(m1[2].mVec128, m2[2].mVec128);
1329 c0 = _mm_and_ps(c0, c1);
1330 c0 = _mm_and_ps(c0, c2);
1332 return (0x7 == _mm_movemask_ps((__m128)c0));
1335 ( m1[0][0] == m2[0][0] && m1[1][0] == m2[1][0] && m1[2][0] == m2[2][0] &&
1336 m1[0][1] == m2[0][1] && m1[1][1] == m2[1][1] && m1[2][1] == m2[2][1] &&
1337 m1[0][2] == m2[0][2] && m1[1][2] == m2[1][2] && m1[2][2] == m2[2][2] );
1358 for (
int i=0;i<3;i++)
1359 m_el[i].serialize(dataOut.m_el[i]);
1364 for (
int i=0;i<3;i++)
1365 m_el[i].serializeFloat(dataOut.
m_el[i]);
1371 for (
int i=0;i<3;i++)
1372 m_el[i].deSerialize(dataIn.m_el[i]);
1377 for (
int i=0;i<3;i++)
1378 m_el[i].deSerializeFloat(dataIn.
m_el[i]);
1383 for (
int i=0;i<3;i++)
1384 m_el[i].deSerializeDouble(dataIn.
m_el[i]);
1387 #endif //BT_MATRIX3x3_H const btScalar & x() const
Return the x value.
void deSerializeFloat(const struct btMatrix3x3FloatData &dataIn)
void serialize(struct btMatrix3x3Data &dataOut) const
btVector3DoubleData m_el[3]
bool operator==(const btMatrix3x3 &m1, const btMatrix3x3 &m2)
Equality operator between two matrices It will test all elements are equal.
btScalar tdoty(const btVector3 &v) const
btScalar tdotx(const btVector3 &v) const
btMatrix3x3 timesTranspose(const btMatrix3x3 &m) const
void setValue(const btScalar &_x, const btScalar &_y, const btScalar &_z)
btVector3 solve33(const btVector3 &b) const
Solve A * x = b, where b is a column vector.
void setRotation(const btQuaternion &q)
Set the matrix from a quaternion.
void serializeFloat(struct btMatrix3x3FloatData &dataOut) const
btScalar tdotz(const btVector3 &v) const
btVector3 dot3(const btVector3 &v0, const btVector3 &v1, const btVector3 &v2) const
btScalar btSin(btScalar x)
const btVector3 & getRow(int i) const
Get a row of the matrix as a vector.
btScalar btSqrt(btScalar y)
#define SIMD_FORCE_INLINE
btScalar cofac(int r1, int c1, int r2, int c2) const
Calculate the matrix cofactor.
btMatrix3x3 operator+(const btMatrix3x3 &m1, const btMatrix3x3 &m2)
btMatrix3x3 & operator=(const btMatrix3x3 &other)
Assignment Operator.
btQuaternion inverse(const btQuaternion &q)
Return the inverse of a quaternion.
btVector3 m_el[3]
Data storage for the matrix, each vector is a row of the matrix.
btMatrix3x3 transpose() const
Return the transpose of the matrix.
btMatrix3x3(const btQuaternion &q)
Constructor from Quaternion.
btVector3 btCross(const btVector3 &v1, const btVector3 &v2)
Return the cross product of two vectors.
btVector3 getColumn(int i) const
Get a column of the matrix as a vector.
void deSerialize(const struct btMatrix3x3Data &dataIn)
btMatrix3x3 transposeTimes(const btMatrix3x3 &m) const
void diagonalize(btMatrix3x3 &rot, btScalar threshold, int maxSteps)
diagonalizes this matrix by the Jacobi method.
const btScalar & x() const
Return the x value.
btMatrix3x3 adjoint() const
Return the adjoint of the matrix.
void deSerializeDouble(const struct btMatrix3x3DoubleData &dataIn)
btMatrix3x3 & operator*=(const btMatrix3x3 &m)
Multiply by the target matrix on the right.
btScalar btAtan2(btScalar x, btScalar y)
void setValue(const btScalar &_x, const btScalar &_y, const btScalar &_z)
Set x,y,z and zero w.
btMatrix3x3 operator*(const btMatrix3x3 &m, const btScalar &k)
btMatrix3x3 scaled(const btVector3 &s) const
Create a scaled copy of the matrix.
const btScalar & y() const
Return the y value.
const btScalar & z() const
Return the z value.
void getEulerZYX(btScalar &yaw, btScalar &pitch, btScalar &roll, unsigned int solution_number=1) const
Get the matrix represented as euler angles around ZYX.
void setValue(const btScalar &xx, const btScalar &xy, const btScalar &xz, const btScalar &yx, const btScalar &yy, const btScalar &yz, const btScalar &zx, const btScalar &zy, const btScalar &zz)
Set the values of the matrix explicitly (row major)
btMatrix3x3(const btScalar &xx, const btScalar &xy, const btScalar &xz, const btScalar &yx, const btScalar &yy, const btScalar &yz, const btScalar &zx, const btScalar &zy, const btScalar &zz)
Constructor with row major formatting.
const btScalar & z() const
Return the z value.
void getEulerYPR(btScalar &yaw, btScalar &pitch, btScalar &roll) const
Get the matrix represented as euler angles around YXZ, roundtrip with setEulerYPR.
btVector3 can be used to represent 3D points and vectors.
#define ATTRIBUTE_ALIGNED16(a)
btMatrix3x3 & operator-=(const btMatrix3x3 &m)
Substractss by the target matrix on the right.
void setEulerYPR(const btScalar &yaw, const btScalar &pitch, const btScalar &roll)
Set the matrix from euler angles using YPR around YXZ respectively.
btMatrix3x3 & operator+=(const btMatrix3x3 &m)
Adds by the target matrix on the right.
btMatrix3x3 operator-(const btMatrix3x3 &m1, const btMatrix3x3 &m2)
btScalar determinant() const
Return the determinant of the matrix.
btMatrix3x3 absolute() const
Return the matrix with all values non negative.
btMatrix3x3()
No initializaion constructor.
void getOpenGLSubMatrix(btScalar *m) const
Fill the rotational part of an OpenGL matrix and clear the shear/perspective.
const btVector3 & operator[](int i) const
Get a const reference to a row of the matrix as a vector.
btScalar length2() const
Return the length squared of the quaternion.
btVector3 & operator[](int i)
Get a mutable reference to a row of the matrix as a vector.
The btMatrix3x3 class implements a 3x3 rotation matrix, to perform linear algebra in combination with...
const btScalar & y() const
Return the y value.
btScalar dot(const btQuaternion &q1, const btQuaternion &q2)
Calculate the dot product between two quaternions.
btMatrix3x3(const btMatrix3x3 &other)
Copy constructor.
The btQuaternion implements quaternion to perform linear algebra rotations in combination with btMatr...
void setFromOpenGLSubMatrix(const btScalar *m)
Set from the rotational part of a 4x4 OpenGL matrix.
btScalar btAsin(btScalar x)
btScalar btDot(const btVector3 &v1, const btVector3 &v2)
Return the dot product between two vectors.
btMatrix3x3 inverse() const
Return the inverse of the matrix.
const btScalar & w() const
Return the w value.
btScalar btTriple(const btVector3 &v1, const btVector3 &v2, const btVector3 &v3)
void getRotation(btQuaternion &q) const
Get the matrix represented as a quaternion.
void setIdentity()
Set the matrix to the identity.
static const btMatrix3x3 & getIdentity()
float btScalar
The btScalar type abstracts floating point numbers, to easily switch between double and single floati...
btScalar btCos(btScalar x)
btVector3FloatData m_el[3]
btScalar btFabs(btScalar x)
void setEulerZYX(btScalar eulerX, btScalar eulerY, btScalar eulerZ)
Set the matrix from euler angles YPR around ZYX axes.