00001 /*************************************************************************** 00002 * Copyright (C) 1998-2009 by authors (see AUTHORS.txt ) * 00003 * * 00004 * This file is part of LuxRender. * 00005 * * 00006 * Lux Renderer is free software; you can redistribute it and/or modify * 00007 * it under the terms of the GNU General Public License as published by * 00008 * the Free Software Foundation; either version 3 of the License, or * 00009 * (at your option) any later version. * 00010 * * 00011 * Lux Renderer is distributed in the hope that it will be useful, * 00012 * but WITHOUT ANY WARRANTY; without even the implied warranty of * 00013 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * 00014 * GNU General Public License for more details. * 00015 * * 00016 * You should have received a copy of the GNU General Public License * 00017 * along with this program. If not, see <http://www.gnu.org/licenses/>. * 00018 * * 00019 * This project is based on PBRT ; see http://www.pbrt.org * 00020 * Lux Renderer website : http://www.luxrender.net * 00021 ***************************************************************************/ 00022 00023 // matrix4x4.cpp* 00024 #include "lux.h" 00025 00026 //#ifndef LUX_USE_SSE 00027 #include "matrix4x4.h" 00028 #include "error.h" 00029 00030 namespace lux 00031 { 00032 00033 Matrix4x4::Matrix4x4(float mat[4][4]) 00034 { 00035 memcpy(m, mat, 16 * sizeof(float)); 00036 } 00037 00038 Matrix4x4::Matrix4x4(float t00, float t01, float t02, float t03, 00039 float t10, float t11, float t12, float t13, 00040 float t20, float t21, float t22, float t23, 00041 float t30, float t31, float t32, float t33) 00042 { 00043 m[0][0] = t00; m[0][1] = t01; m[0][2] = t02; m[0][3] = t03; 00044 m[1][0] = t10; m[1][1] = t11; m[1][2] = t12; m[1][3] = t13; 00045 m[2][0] = t20; m[2][1] = t21; m[2][2] = t22; m[2][3] = t23; 00046 m[3][0] = t30; m[3][1] = t31; m[3][2] = t32; m[3][3] = t33; 00047 } 00048 00049 boost::shared_ptr<Matrix4x4> Matrix4x4::Transpose() const 00050 { 00051 boost::shared_ptr<Matrix4x4> o(new Matrix4x4(m[0][0], m[1][0], m[2][0], m[3][0], 00052 m[0][1], m[1][1], m[2][1], m[3][1], 00053 m[0][2], m[1][2], m[2][2], m[3][2], 00054 m[0][3], m[1][3], m[2][3], m[3][3])); 00055 return o; 00056 } 00057 00058 // TODO - lordcrc - move to proper header file 00059 float Det2x2(const float a00, const float a01, const float a10, const float a11) { 00060 return a00*a11 - a01*a10; 00061 } 00062 00063 // TODO - lordcrc - move to proper header file 00064 float Det3x3(float A[3][3]) { 00065 return 00066 A[0][0] * Det2x2(A[1][1], A[1][2], A[2][1], A[2][2]) - 00067 A[0][1] * Det2x2(A[1][0], A[1][2], A[2][0], A[2][2]) + 00068 A[0][2] * Det2x2(A[1][0], A[1][1], A[2][0], A[2][1]); 00069 } 00070 00071 float Matrix4x4::Determinant() const { 00072 00073 // row expansion along the last row 00074 // for most matrices this would be most efficient 00075 00076 float result = 0; 00077 float s = -1; 00078 00079 float A[3][3]; 00080 00081 // initialize for first expansion 00082 for (int i = 0; i < 3; i++) { 00083 for (int j = 0; j < 3; j++) 00084 A[i][j] = m[i][j+1]; 00085 } 00086 00087 int k = 0; 00088 00089 while (true) { 00090 if (m[3][k] != 0.f) 00091 result += s * m[3][k] * Det3x3(A); 00092 00093 // we're done 00094 if (k >= 3) 00095 break; 00096 00097 s *= -1; 00098 00099 // copy column for next expansion 00100 for (int i = 0; i < 3; i++) 00101 A[i][k] = m[i][k]; 00102 00103 k++; 00104 } 00105 00106 return result; 00107 } 00108 00109 boost::shared_ptr<Matrix4x4> Matrix4x4::Inverse() const 00110 { 00111 int indxc[4], indxr[4]; 00112 int ipiv[4] = { 0, 0, 0, 0 }; 00113 float minv[4][4]; 00114 memcpy(minv, m, 4 * 4 * sizeof(float)); 00115 for (int i = 0; i < 4; ++i) { 00116 int irow = -1, icol = -1; 00117 float big = 0.; 00118 // Choose pivot 00119 for (int j = 0; j < 4; ++j) { 00120 if (ipiv[j] != 1) { 00121 for (int k = 0; k < 4; ++k) { 00122 if (ipiv[k] == 0) { 00123 if (fabsf(minv[j][k]) >= big) { 00124 big = fabsf(minv[j][k]); 00125 irow = j; 00126 icol = k; 00127 } 00128 } else if (ipiv[k] > 1) 00129 luxError(LUX_MATH, LUX_ERROR, "Singular matrix in MatrixInvert"); 00130 } 00131 } 00132 } 00133 ++ipiv[icol]; 00134 // Swap rows _irow_ and _icol_ for pivot 00135 if (irow != icol) { 00136 for (int k = 0; k < 4; ++k) 00137 swap(minv[irow][k], minv[icol][k]); 00138 } 00139 indxr[i] = irow; 00140 indxc[i] = icol; 00141 if (minv[icol][icol] == 0.) 00142 luxError(LUX_MATH, LUX_ERROR, "Singular matrix in MatrixInvert"); 00143 // Set $m[icol][icol]$ to one by scaling row _icol_ appropriately 00144 float pivinv = 1.f / minv[icol][icol]; 00145 minv[icol][icol] = 1.f; 00146 for (int j = 0; j < 4; ++j) 00147 minv[icol][j] *= pivinv; 00148 // Subtract this row from others to zero out their columns 00149 for (int j = 0; j < 4; ++j) { 00150 if (j != icol) { 00151 float save = minv[j][icol]; 00152 minv[j][icol] = 0; 00153 for (int k = 0; k < 4; ++k) 00154 minv[j][k] -= minv[icol][k] * save; 00155 } 00156 } 00157 } 00158 // Swap columns to reflect permutation 00159 for (int j = 3; j >= 0; --j) { 00160 if (indxr[j] != indxc[j]) { 00161 for (int k = 0; k < 4; ++k) 00162 swap(minv[k][indxr[j]], minv[k][indxc[j]]); 00163 } 00164 } 00165 //return new Matrix4x4(minv); 00166 boost::shared_ptr<Matrix4x4> o(new Matrix4x4(minv)); 00167 return o; 00168 } 00169 00170 } 00171 00172 /*#else // LUX_USE_SSE 00173 00174 #include "matrix4x4-sse.h" 00175 00176 typedef long int32; //!Jeanphi: replaced int by long. Should use POSIX types 00177 #ifdef __GNUC__ 00178 const float __attribute__ ((aligned(16))) _F1001[4] = { 1.0f, 0.0f, 0.0f, 1.0f }; 00179 const int32 __attribute__ ((aligned(16))) _Sign_PNNP[4] = { 0x00000000, 0x80000000, 0x80000000, 0x00000000 }; 00180 #else //intel compiler 00181 const _MM_ALIGN16 float _F1001[4] = { 1.0f, 0.0f, 0.0f, 1.0f }; 00182 const _MM_ALIGN16 __int32 _Sign_PNNP[4] = { 0x00000000, 0x80000000, 0x80000000, 0x00000000 }; 00183 #endif 00184 #define Sign_PNNP (*(__m128*)&_Sign_PNNP) // + - - + 00185 00186 namespace lux 00187 { 00188 00189 Matrix4x4::Matrix4x4(float mat[4][4]) 00190 { 00191 _L1 = _mm_set_ps(mat[3][0], mat[2][0], mat[1][0], mat[0][0]); 00192 _L2 = _mm_set_ps(mat[3][1], mat[2][1], mat[1][1], mat[0][1]); 00193 _L3 = _mm_set_ps(mat[3][2], mat[2][2], mat[1][2], mat[0][2]); 00194 _L4 = _mm_set_ps(mat[3][3], mat[2][3], mat[1][3], mat[0][3]); 00195 } 00196 00197 Matrix4x4::Matrix4x4(float f11, float f12, float f13, float f14, 00198 float f21, float f22, float f23, float f24, 00199 float f31, float f32, float f33, float f34, 00200 float f41, float f42, float f43, float f44) 00201 { 00202 _L1 = _mm_set_ps(f41, f31, f21, f11); 00203 _L2 = _mm_set_ps(f42, f32, f22, f12); 00204 _L3 = _mm_set_ps(f43, f33, f23, f13); 00205 _L4 = _mm_set_ps(f44, f34, f24, f14); 00206 } 00207 00208 boost::shared_ptr<Matrix4x4> Matrix4x4::Transpose() const 00209 { 00210 __m128 xmm0 = _mm_unpacklo_ps(_L1, _L2), 00211 xmm1 = _mm_unpacklo_ps(_L3, _L4), 00212 xmm2 = _mm_unpackhi_ps(_L1, _L2), 00213 xmm3 = _mm_unpackhi_ps(_L3, _L4); 00214 00215 __m128 L1, L2, L3, L4; 00216 L1 = _mm_movelh_ps(xmm0, xmm1); 00217 L2 = _mm_movehl_ps(xmm1, xmm0); 00218 L3 = _mm_movelh_ps(xmm2, xmm3); 00219 L4 = _mm_movehl_ps(xmm3, xmm2); 00220 00221 boost::shared_ptr<Matrix4x4> o(new Matrix4x4(L1, L2, L3, L4)); 00222 return o; 00223 } 00224 00225 boost::shared_ptr<Matrix4x4> Matrix4x4::Inverse() const 00226 { 00227 __m128 A = _mm_movelh_ps(_L1, _L2), // the four sub-matrices 00228 B = _mm_movehl_ps(_L2, _L1), 00229 C = _mm_movelh_ps(_L3, _L4), 00230 D = _mm_movehl_ps(_L4, _L3); 00231 __m128 iA, iB, iC, iD, // partial inverse of the sub-matrices 00232 DC, AB; 00233 __m128 dA, dB, dC, dD; // determinant of the sub-matrices 00234 __m128 det, d, d1, d2; 00235 __m128 rd; 00236 00237 // AB = A# * B 00238 AB = _mm_mul_ps(_mm_shuffle_ps(A, A, 0x0F), B); 00239 AB = _mm_sub_ps(AB, _mm_mul_ps(_mm_shuffle_ps(A, A, 0xA5), 00240 _mm_shuffle_ps(B, B, 0x4E))); 00241 // DC = D# * C 00242 DC = _mm_mul_ps(_mm_shuffle_ps(D, D, 0x0F), C); 00243 DC = _mm_sub_ps(DC, _mm_mul_ps(_mm_shuffle_ps(D, D, 0xA5), 00244 _mm_shuffle_ps(C, C, 0x4E))); 00245 00246 // dA = |A| 00247 dA = _mm_mul_ps(_mm_shuffle_ps(A, A, 0x5F), A); 00248 dA = _mm_sub_ss(dA, _mm_movehl_ps(dA, dA)); 00249 // dB = |B| 00250 dB = _mm_mul_ps(_mm_shuffle_ps(B, B, 0x5F), B); 00251 dB = _mm_sub_ss(dB, _mm_movehl_ps(dB, dB)); 00252 00253 // dC = |C| 00254 dC = _mm_mul_ps(_mm_shuffle_ps(C, C, 0x5F), C); 00255 dC = _mm_sub_ss(dC, _mm_movehl_ps(dC, dC)); 00256 // dD = |D| 00257 dD = _mm_mul_ps(_mm_shuffle_ps(D, D, 0x5F), D); 00258 dD = _mm_sub_ss(dD, _mm_movehl_ps(dD, dD)); 00259 00260 // d = trace(AB*DC) = trace(A#*B*D#*C) 00261 d = _mm_mul_ps(_mm_shuffle_ps(DC, DC, 0xD8), AB); 00262 00263 // iD = C*A#*B 00264 iD = _mm_mul_ps(_mm_shuffle_ps(C, C, 0xA0), _mm_movelh_ps(AB, AB)); 00265 iD = _mm_add_ps(iD, _mm_mul_ps(_mm_shuffle_ps(C, C, 0xF5), 00266 _mm_movehl_ps(AB, AB))); 00267 // iA = B*D#*C 00268 iA = _mm_mul_ps(_mm_shuffle_ps(B, B, 0xA0), _mm_movelh_ps(DC, DC)); 00269 iA = _mm_add_ps(iA, _mm_mul_ps(_mm_shuffle_ps(B, B, 0xF5), 00270 _mm_movehl_ps(DC, DC))); 00271 00272 // d = trace(AB*DC) = trace(A#*B*D#*C) [continue] 00273 d = _mm_add_ps(d, _mm_movehl_ps(d, d)); 00274 d = _mm_add_ss(d, _mm_shuffle_ps(d, d, 1)); 00275 d1 = _mm_mul_ps(dA, dD); 00276 d2 = _mm_mul_ps(dB, dC); 00277 00278 // iD = D*|A| - C*A#*B 00279 iD = _mm_sub_ps(_mm_mul_ps(D, _mm_shuffle_ps(dA, dA, 0)), iD); 00280 00281 // iA = A*|D| - B*D#*C; 00282 iA = _mm_sub_ps(_mm_mul_ps(A, _mm_shuffle_ps(dD, dD, 0)), iA); 00283 00284 // det = |A|*|D| + |B|*|C| - trace(A#*B*D#*C) 00285 det = _mm_sub_ps(_mm_add_ps(d1, d2), d); 00286 rd = (__m128)(_mm_div_ps(_mm_set_ss(1.0f), det)); 00287 #ifdef ZERO_SINGULAR 00288 rd = _mm_and_ps(_mm_cmpneq_ss(det, _mm_setzero_ps()), rd); 00289 #endif 00290 00291 // iB = D * (A#B)# = D*B#*A 00292 iB = _mm_mul_ps(D, _mm_shuffle_ps(AB, AB, 0x33)); 00293 iB = _mm_sub_ps(iB, _mm_mul_ps(_mm_shuffle_ps(D, D, 0xB1), 00294 _mm_shuffle_ps(AB, AB, 0x66))); 00295 // iC = A * (D#C)# = A*C#*D 00296 iC = _mm_mul_ps(A, _mm_shuffle_ps(DC, DC, 0x33)); 00297 iC = _mm_sub_ps(iC, _mm_mul_ps(_mm_shuffle_ps(A, A, 0xB1), 00298 _mm_shuffle_ps(DC, DC, 0x66))); 00299 00300 rd = _mm_shuffle_ps(rd, rd, 0); 00301 rd = _mm_xor_ps(rd, Sign_PNNP); 00302 00303 // iB = C*|B| - D*B#*A 00304 iB = _mm_sub_ps(_mm_mul_ps(C, _mm_shuffle_ps(dB, dB, 0)), iB); 00305 00306 // iC = B*|C| - A*C#*D; 00307 iC = _mm_sub_ps(_mm_mul_ps(B, _mm_shuffle_ps(dC, dC, 0)) ,iC); 00308 00309 // iX = iX / det 00310 iA =_mm_mul_ps(iA, rd); 00311 iB =_mm_mul_ps(iB, rd); 00312 iC =_mm_mul_ps(iC, rd); 00313 iD =_mm_mul_ps(iD, rd); 00314 00315 //return new Matrix4x4(_mm_shuffle_ps(iA,iB,0x77),_mm_shuffle_ps(iA,iB,0x22),_mm_shuffle_ps(iC,iD,0x77),_mm_shuffle_ps(iC,iD,0x22)); 00316 00317 boost::shared_ptr<Matrix4x4> 00318 o(new Matrix4x4(_mm_shuffle_ps(iA, iB, 0x77), 00319 _mm_shuffle_ps(iA, iB, 0x22), 00320 _mm_shuffle_ps(iC, iD, 0x77), 00321 _mm_shuffle_ps(iC, iD, 0x22))); 00322 return o; 00323 } 00324 00325 } 00326 00327 #endif // LUX_USE_SSE */ 00328 00329 00330