$treeview $search $mathjax
Eigen
3.2.5
$projectbrief
|
$projectbrief
|
$searchbox |
00001 // This file is part of Eigen, a lightweight C++ template library 00002 // for linear algebra. 00003 // 00004 // Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@inria.fr> 00005 // Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com> 00006 // 00007 // This Source Code Form is subject to the terms of the Mozilla 00008 // Public License v. 2.0. If a copy of the MPL was not distributed 00009 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. 00010 00011 #ifndef EIGEN_REDUX_H 00012 #define EIGEN_REDUX_H 00013 00014 namespace Eigen { 00015 00016 namespace internal { 00017 00018 // TODO 00019 // * implement other kind of vectorization 00020 // * factorize code 00021 00022 /*************************************************************************** 00023 * Part 1 : the logic deciding a strategy for vectorization and unrolling 00024 ***************************************************************************/ 00025 00026 template<typename Func, typename Derived> 00027 struct redux_traits 00028 { 00029 public: 00030 enum { 00031 PacketSize = packet_traits<typename Derived::Scalar>::size, 00032 InnerMaxSize = int(Derived::IsRowMajor) 00033 ? Derived::MaxColsAtCompileTime 00034 : Derived::MaxRowsAtCompileTime 00035 }; 00036 00037 enum { 00038 MightVectorize = (int(Derived::Flags)&ActualPacketAccessBit) 00039 && (functor_traits<Func>::PacketAccess), 00040 MayLinearVectorize = MightVectorize && (int(Derived::Flags)&LinearAccessBit), 00041 MaySliceVectorize = MightVectorize && int(InnerMaxSize)>=3*PacketSize 00042 }; 00043 00044 public: 00045 enum { 00046 Traversal = int(MayLinearVectorize) ? int(LinearVectorizedTraversal) 00047 : int(MaySliceVectorize) ? int(SliceVectorizedTraversal) 00048 : int(DefaultTraversal) 00049 }; 00050 00051 public: 00052 enum { 00053 Cost = ( Derived::SizeAtCompileTime == Dynamic 00054 || Derived::CoeffReadCost == Dynamic 00055 || (Derived::SizeAtCompileTime!=1 && functor_traits<Func>::Cost == Dynamic) 00056 ) ? Dynamic 00057 : Derived::SizeAtCompileTime * Derived::CoeffReadCost 00058 + (Derived::SizeAtCompileTime-1) * functor_traits<Func>::Cost, 00059 UnrollingLimit = EIGEN_UNROLLING_LIMIT * (int(Traversal) == int(DefaultTraversal) ? 1 : int(PacketSize)) 00060 }; 00061 00062 public: 00063 enum { 00064 Unrolling = Cost != Dynamic && Cost <= UnrollingLimit 00065 ? CompleteUnrolling 00066 : NoUnrolling 00067 }; 00068 }; 00069 00070 /*************************************************************************** 00071 * Part 2 : unrollers 00072 ***************************************************************************/ 00073 00074 /*** no vectorization ***/ 00075 00076 template<typename Func, typename Derived, int Start, int Length> 00077 struct redux_novec_unroller 00078 { 00079 enum { 00080 HalfLength = Length/2 00081 }; 00082 00083 typedef typename Derived::Scalar Scalar; 00084 00085 static EIGEN_STRONG_INLINE Scalar run(const Derived &mat, const Func& func) 00086 { 00087 return func(redux_novec_unroller<Func, Derived, Start, HalfLength>::run(mat,func), 00088 redux_novec_unroller<Func, Derived, Start+HalfLength, Length-HalfLength>::run(mat,func)); 00089 } 00090 }; 00091 00092 template<typename Func, typename Derived, int Start> 00093 struct redux_novec_unroller<Func, Derived, Start, 1> 00094 { 00095 enum { 00096 outer = Start / Derived::InnerSizeAtCompileTime, 00097 inner = Start % Derived::InnerSizeAtCompileTime 00098 }; 00099 00100 typedef typename Derived::Scalar Scalar; 00101 00102 static EIGEN_STRONG_INLINE Scalar run(const Derived &mat, const Func&) 00103 { 00104 return mat.coeffByOuterInner(outer, inner); 00105 } 00106 }; 00107 00108 // This is actually dead code and will never be called. It is required 00109 // to prevent false warnings regarding failed inlining though 00110 // for 0 length run() will never be called at all. 00111 template<typename Func, typename Derived, int Start> 00112 struct redux_novec_unroller<Func, Derived, Start, 0> 00113 { 00114 typedef typename Derived::Scalar Scalar; 00115 static EIGEN_STRONG_INLINE Scalar run(const Derived&, const Func&) { return Scalar(); } 00116 }; 00117 00118 /*** vectorization ***/ 00119 00120 template<typename Func, typename Derived, int Start, int Length> 00121 struct redux_vec_unroller 00122 { 00123 enum { 00124 PacketSize = packet_traits<typename Derived::Scalar>::size, 00125 HalfLength = Length/2 00126 }; 00127 00128 typedef typename Derived::Scalar Scalar; 00129 typedef typename packet_traits<Scalar>::type PacketScalar; 00130 00131 static EIGEN_STRONG_INLINE PacketScalar run(const Derived &mat, const Func& func) 00132 { 00133 return func.packetOp( 00134 redux_vec_unroller<Func, Derived, Start, HalfLength>::run(mat,func), 00135 redux_vec_unroller<Func, Derived, Start+HalfLength, Length-HalfLength>::run(mat,func) ); 00136 } 00137 }; 00138 00139 template<typename Func, typename Derived, int Start> 00140 struct redux_vec_unroller<Func, Derived, Start, 1> 00141 { 00142 enum { 00143 index = Start * packet_traits<typename Derived::Scalar>::size, 00144 outer = index / int(Derived::InnerSizeAtCompileTime), 00145 inner = index % int(Derived::InnerSizeAtCompileTime), 00146 alignment = (Derived::Flags & AlignedBit) ? Aligned : Unaligned 00147 }; 00148 00149 typedef typename Derived::Scalar Scalar; 00150 typedef typename packet_traits<Scalar>::type PacketScalar; 00151 00152 static EIGEN_STRONG_INLINE PacketScalar run(const Derived &mat, const Func&) 00153 { 00154 return mat.template packetByOuterInner<alignment>(outer, inner); 00155 } 00156 }; 00157 00158 /*************************************************************************** 00159 * Part 3 : implementation of all cases 00160 ***************************************************************************/ 00161 00162 template<typename Func, typename Derived, 00163 int Traversal = redux_traits<Func, Derived>::Traversal, 00164 int Unrolling = redux_traits<Func, Derived>::Unrolling 00165 > 00166 struct redux_impl; 00167 00168 template<typename Func, typename Derived> 00169 struct redux_impl<Func, Derived, DefaultTraversal, NoUnrolling> 00170 { 00171 typedef typename Derived::Scalar Scalar; 00172 typedef typename Derived::Index Index; 00173 static EIGEN_STRONG_INLINE Scalar run(const Derived& mat, const Func& func) 00174 { 00175 eigen_assert(mat.rows()>0 && mat.cols()>0 && "you are using an empty matrix"); 00176 Scalar res; 00177 res = mat.coeffByOuterInner(0, 0); 00178 for(Index i = 1; i < mat.innerSize(); ++i) 00179 res = func(res, mat.coeffByOuterInner(0, i)); 00180 for(Index i = 1; i < mat.outerSize(); ++i) 00181 for(Index j = 0; j < mat.innerSize(); ++j) 00182 res = func(res, mat.coeffByOuterInner(i, j)); 00183 return res; 00184 } 00185 }; 00186 00187 template<typename Func, typename Derived> 00188 struct redux_impl<Func,Derived, DefaultTraversal, CompleteUnrolling> 00189 : public redux_novec_unroller<Func,Derived, 0, Derived::SizeAtCompileTime> 00190 {}; 00191 00192 template<typename Func, typename Derived> 00193 struct redux_impl<Func, Derived, LinearVectorizedTraversal, NoUnrolling> 00194 { 00195 typedef typename Derived::Scalar Scalar; 00196 typedef typename packet_traits<Scalar>::type PacketScalar; 00197 typedef typename Derived::Index Index; 00198 00199 static Scalar run(const Derived& mat, const Func& func) 00200 { 00201 const Index size = mat.size(); 00202 eigen_assert(size && "you are using an empty matrix"); 00203 const Index packetSize = packet_traits<Scalar>::size; 00204 const Index alignedStart = internal::first_aligned(mat); 00205 enum { 00206 alignment = bool(Derived::Flags & DirectAccessBit) || bool(Derived::Flags & AlignedBit) 00207 ? Aligned : Unaligned 00208 }; 00209 const Index alignedSize2 = ((size-alignedStart)/(2*packetSize))*(2*packetSize); 00210 const Index alignedSize = ((size-alignedStart)/(packetSize))*(packetSize); 00211 const Index alignedEnd2 = alignedStart + alignedSize2; 00212 const Index alignedEnd = alignedStart + alignedSize; 00213 Scalar res; 00214 if(alignedSize) 00215 { 00216 PacketScalar packet_res0 = mat.template packet<alignment>(alignedStart); 00217 if(alignedSize>packetSize) // we have at least two packets to partly unroll the loop 00218 { 00219 PacketScalar packet_res1 = mat.template packet<alignment>(alignedStart+packetSize); 00220 for(Index index = alignedStart + 2*packetSize; index < alignedEnd2; index += 2*packetSize) 00221 { 00222 packet_res0 = func.packetOp(packet_res0, mat.template packet<alignment>(index)); 00223 packet_res1 = func.packetOp(packet_res1, mat.template packet<alignment>(index+packetSize)); 00224 } 00225 00226 packet_res0 = func.packetOp(packet_res0,packet_res1); 00227 if(alignedEnd>alignedEnd2) 00228 packet_res0 = func.packetOp(packet_res0, mat.template packet<alignment>(alignedEnd2)); 00229 } 00230 res = func.predux(packet_res0); 00231 00232 for(Index index = 0; index < alignedStart; ++index) 00233 res = func(res,mat.coeff(index)); 00234 00235 for(Index index = alignedEnd; index < size; ++index) 00236 res = func(res,mat.coeff(index)); 00237 } 00238 else // too small to vectorize anything. 00239 // since this is dynamic-size hence inefficient anyway for such small sizes, don't try to optimize. 00240 { 00241 res = mat.coeff(0); 00242 for(Index index = 1; index < size; ++index) 00243 res = func(res,mat.coeff(index)); 00244 } 00245 00246 return res; 00247 } 00248 }; 00249 00250 template<typename Func, typename Derived> 00251 struct redux_impl<Func, Derived, SliceVectorizedTraversal, NoUnrolling> 00252 { 00253 typedef typename Derived::Scalar Scalar; 00254 typedef typename packet_traits<Scalar>::type PacketScalar; 00255 typedef typename Derived::Index Index; 00256 00257 static Scalar run(const Derived& mat, const Func& func) 00258 { 00259 eigen_assert(mat.rows()>0 && mat.cols()>0 && "you are using an empty matrix"); 00260 const Index innerSize = mat.innerSize(); 00261 const Index outerSize = mat.outerSize(); 00262 enum { 00263 packetSize = packet_traits<Scalar>::size 00264 }; 00265 const Index packetedInnerSize = ((innerSize)/packetSize)*packetSize; 00266 Scalar res; 00267 if(packetedInnerSize) 00268 { 00269 PacketScalar packet_res = mat.template packet<Unaligned>(0,0); 00270 for(Index j=0; j<outerSize; ++j) 00271 for(Index i=(j==0?packetSize:0); i<packetedInnerSize; i+=Index(packetSize)) 00272 packet_res = func.packetOp(packet_res, mat.template packetByOuterInner<Unaligned>(j,i)); 00273 00274 res = func.predux(packet_res); 00275 for(Index j=0; j<outerSize; ++j) 00276 for(Index i=packetedInnerSize; i<innerSize; ++i) 00277 res = func(res, mat.coeffByOuterInner(j,i)); 00278 } 00279 else // too small to vectorize anything. 00280 // since this is dynamic-size hence inefficient anyway for such small sizes, don't try to optimize. 00281 { 00282 res = redux_impl<Func, Derived, DefaultTraversal, NoUnrolling>::run(mat, func); 00283 } 00284 00285 return res; 00286 } 00287 }; 00288 00289 template<typename Func, typename Derived> 00290 struct redux_impl<Func, Derived, LinearVectorizedTraversal, CompleteUnrolling> 00291 { 00292 typedef typename Derived::Scalar Scalar; 00293 typedef typename packet_traits<Scalar>::type PacketScalar; 00294 enum { 00295 PacketSize = packet_traits<Scalar>::size, 00296 Size = Derived::SizeAtCompileTime, 00297 VectorizedSize = (Size / PacketSize) * PacketSize 00298 }; 00299 static EIGEN_STRONG_INLINE Scalar run(const Derived& mat, const Func& func) 00300 { 00301 eigen_assert(mat.rows()>0 && mat.cols()>0 && "you are using an empty matrix"); 00302 Scalar res = func.predux(redux_vec_unroller<Func, Derived, 0, Size / PacketSize>::run(mat,func)); 00303 if (VectorizedSize != Size) 00304 res = func(res,redux_novec_unroller<Func, Derived, VectorizedSize, Size-VectorizedSize>::run(mat,func)); 00305 return res; 00306 } 00307 }; 00308 00309 } // end namespace internal 00310 00311 /*************************************************************************** 00312 * Part 4 : public API 00313 ***************************************************************************/ 00314 00315 00323 template<typename Derived> 00324 template<typename Func> 00325 EIGEN_STRONG_INLINE typename internal::result_of<Func(typename internal::traits<Derived>::Scalar)>::type 00326 DenseBase<Derived>::redux(const Func& func) const 00327 { 00328 typedef typename internal::remove_all<typename Derived::Nested>::type ThisNested; 00329 return internal::redux_impl<Func, ThisNested> 00330 ::run(derived(), func); 00331 } 00332 00336 template<typename Derived> 00337 EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar 00338 DenseBase<Derived>::minCoeff() const 00339 { 00340 return this->redux(Eigen::internal::scalar_min_op<Scalar>()); 00341 } 00342 00346 template<typename Derived> 00347 EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar 00348 DenseBase<Derived>::maxCoeff() const 00349 { 00350 return this->redux(Eigen::internal::scalar_max_op<Scalar>()); 00351 } 00352 00357 template<typename Derived> 00358 EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar 00359 DenseBase<Derived>::sum() const 00360 { 00361 if(SizeAtCompileTime==0 || (SizeAtCompileTime==Dynamic && size()==0)) 00362 return Scalar(0); 00363 return this->redux(Eigen::internal::scalar_sum_op<Scalar>()); 00364 } 00365 00370 template<typename Derived> 00371 EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar 00372 DenseBase<Derived>::mean() const 00373 { 00374 return Scalar(this->redux(Eigen::internal::scalar_sum_op<Scalar>())) / Scalar(this->size()); 00375 } 00376 00384 template<typename Derived> 00385 EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar 00386 DenseBase<Derived>::prod() const 00387 { 00388 if(SizeAtCompileTime==0 || (SizeAtCompileTime==Dynamic && size()==0)) 00389 return Scalar(1); 00390 return this->redux(Eigen::internal::scalar_product_op<Scalar>()); 00391 } 00392 00399 template<typename Derived> 00400 EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar 00401 MatrixBase<Derived>::trace() const 00402 { 00403 return derived().diagonal().sum(); 00404 } 00405 00406 } // end namespace Eigen 00407 00408 #endif // EIGEN_REDUX_H