glue_plus_meat.hpp

Go to the documentation of this file.
00001 // Copyright (C) 2009 NICTA
00002 // 
00003 // Authors:
00004 // - Conrad Sanderson (conradsand at ieee dot org)
00005 // 
00006 // This file is part of the Armadillo C++ library.
00007 // It is provided without any warranty of fitness
00008 // for any purpose. You can redistribute this file
00009 // and/or modify it under the terms of the GNU
00010 // Lesser General Public License (LGPL) as published
00011 // by the Free Software Foundation, either version 3
00012 // of the License or (at your option) any later version.
00013 // (see http://www.opensource.org/licenses for more info)
00014 
00015 
00016 //! \addtogroup glue_plus
00017 //! @{
00018 
00019 
00020 
00021 template<typename eT>
00022 inline
00023 void
00024 glue_plus::apply(Mat<eT>& out, const Mat<eT>& A, const Mat<eT>& B)
00025   {
00026   arma_extra_debug_sigprint();
00027   
00028   arma_debug_assert_same_size(A, B, "matrix addition");
00029   
00030   // no aliasing problem
00031   out.set_size(A.n_rows, A.n_cols);
00032   
00033         eT* out_mem = out.memptr();
00034   const eT* A_mem   = A.mem;
00035   const eT* B_mem   = B.mem;
00036     
00037   const u32 n_elem  = out.n_elem;
00038   
00039   for(u32 i=0; i<n_elem; ++i)
00040     {
00041     out_mem[i] = A_mem[i] + B_mem[i];
00042     }
00043     
00044   }
00045 
00046 
00047 
00048 template<typename eT>
00049 inline
00050 void
00051 glue_plus::apply(Mat<eT>& out, const Mat<eT>& A, const Mat<eT>& B, const Mat<eT>& C)
00052   {
00053   arma_extra_debug_sigprint();
00054   
00055   arma_debug_assert_same_size(A, B, "matrix addition");
00056   arma_debug_assert_same_size(A, C, "matrix addition");
00057   
00058   // no aliasing problem
00059   out.set_size(A.n_rows, A.n_cols);
00060     
00061         eT* out_mem = out.memptr();
00062   const eT* A_mem   = A.mem;
00063   const eT* B_mem   = B.mem;
00064   const eT* C_mem   = C.mem;
00065   
00066   const u32 n_elem  = A.n_elem;
00067   
00068   for(u32 i=0; i<n_elem; ++i)
00069     {
00070     out_mem[i] = A_mem[i] + B_mem[i] + C_mem[i];
00071     }
00072     
00073   }
00074 
00075 
00076 
00077 template<typename eT>
00078 inline
00079 void
00080 glue_plus::apply(Mat<eT>& out, const Glue<Mat<eT>,Mat<eT>,glue_plus>& X)
00081   {
00082   glue_plus::apply(out, X.A, X.B);
00083   }
00084 
00085 
00086 
00087 template<typename eT>
00088 inline
00089 void
00090 glue_plus::apply(Mat<eT>& out, const Glue< Glue<Mat<eT>,Mat<eT>,glue_plus>, Mat<eT>, glue_plus>& X)
00091   {
00092   glue_plus::apply(out, X.A.A, X.A.B, X.B);
00093   }
00094 
00095 
00096 
00097 template<typename T1, typename T2>
00098 inline
00099 void
00100 glue_plus::apply(Mat<typename T1::elem_type>& out, const Glue<T1,T2,glue_plus>& X)
00101   {
00102   arma_extra_debug_sigprint();
00103   
00104   typedef typename T1::elem_type eT;
00105   
00106   const u32 N_mat = 1 + depth_lhs< glue_plus, Glue<T1,T2,glue_plus> >::num;
00107   arma_extra_debug_print( arma_boost::format("N_mat = %d") % N_mat );
00108 
00109   if(N_mat == 2)
00110     {
00111     if(is_glue_times<T1>::value == true)
00112       {
00113       out = X.B;
00114       glue_plus::apply_inplace(out, X.A);
00115       }
00116     else
00117     if(is_glue_times<T2>::value == true)
00118       {
00119       out = X.A;
00120       glue_plus::apply_inplace(out, X.B);
00121       }
00122     else
00123       {
00124       const unwrap<T1> tmp1(X.A);
00125       const unwrap<T2> tmp2(X.B);
00126       
00127       glue_plus::apply(out, tmp1.M, tmp2.M);
00128       }
00129     }
00130   else
00131     {
00132     const Mat<eT>* ptrs[N_mat];
00133     bool            del[N_mat];
00134 
00135     mat_ptrs<glue_plus, Glue<T1,T2,glue_plus> >::get_ptrs(ptrs, del, X);
00136 
00137     for(u32 i=0; i<N_mat; ++i)  arma_extra_debug_print( arma_boost::format("ptrs[%d] = %x") % i % ptrs[i] );
00138     for(u32 i=0; i<N_mat; ++i)  arma_extra_debug_print( arma_boost::format(" del[%d] = %d") % i %  del[i] );
00139 
00140     const u32 n_rows = ptrs[0]->n_rows;
00141     const u32 n_cols = ptrs[0]->n_cols;
00142   
00143     const Mat<eT>& tmp_mat = *(ptrs[0]);
00144     
00145     for(u32 i=1; i<N_mat; ++i)
00146       {
00147       arma_debug_assert_same_size(tmp_mat, *(ptrs[i]), "matrix addition");
00148       }
00149   
00150   
00151     // no aliasing problem
00152     out.set_size(n_rows, n_cols);
00153     
00154     const u32 n_elem = ptrs[0]->n_elem;
00155     
00156     for(u32 j=0; j<n_elem; ++j)
00157       {
00158       eT acc = ptrs[0]->mem[j];
00159     
00160       for(u32 i=1; i < N_mat; ++i)
00161         {
00162         acc += ptrs[i]->mem[j];
00163         }
00164     
00165       out[j] = acc;
00166       }
00167     
00168     
00169     for(u32 i=0; i<N_mat; ++i)
00170       {
00171       if(del[i] == true)
00172         {
00173         arma_extra_debug_print( arma_boost::format("delete mat_ptr[%d]") % i );
00174         delete ptrs[i];
00175         }
00176       }
00177     }
00178   }
00179 
00180 
00181 
00182 // possible aliasing cases:
00183 // Q = Q + Q.row(0)  -> no problem  (aliasing has no effect or incompatible matrix dimensions).
00184 //                      however, the only time the above will work is when Q has the same dimensions as Q.row(0),
00185 //                      meaning that doing this addition operation is pretty silly
00186 // Q = Q + R.row(0)  -> no problem
00187 // Q = R + Q.row(0)  -> output Q is set to size of R, which may destroy input Q
00188 //
00189 // strategy:
00190 // if the matrix from the second argument is an alias of the output matrix,
00191 // make a proper matrix out of the second argument
00192 
00193 template<typename eT>
00194 inline
00195 void
00196 glue_plus::apply(Mat<eT>& out, const Glue<Mat<eT>, subview<eT>, glue_plus>& X)
00197   {
00198   arma_extra_debug_sigprint();
00199   
00200   const Mat<eT>& orig_A = X.A;
00201   const Mat<eT>& orig_B = X.B.m;
00202   
00203   if( &out != &orig_B )
00204     {
00205     //const u32 sub_B_n_rows = X.B.n_rows;
00206     //const u32 sub_B_n_cols = X.B.n_cols;
00207     
00208     arma_debug_assert_same_size(X.A, X.B, "matrix addition");
00209       
00210     
00211     out.set_size(orig_A.n_rows, orig_A.n_cols);
00212     
00213     for(u32 col = 0; col<orig_A.n_cols; ++col)
00214       {
00215       const u32 B_col_mod = X.B.aux_col1 + col;
00216       
00217       for(u32 row = 0; row<orig_A.n_rows; ++row)
00218         {
00219         const u32 B_row_mod = X.B.aux_row1 + row;
00220         
00221         out.at(row,col) =  orig_A.at(row, col) + orig_B.at(B_row_mod, B_col_mod);
00222         }
00223       
00224       }
00225     
00226     }
00227   else
00228     {
00229     const Mat<eT> processed_B(X.B);  // create a matrix out of subview
00230     glue_plus::apply(out, orig_A, processed_B);
00231     }
00232      
00233   }
00234 
00235 
00236 // possible aliasing cases:
00237 // Q = Q.row(0) + Q  -> no problem (aliasing has no effect or incompatible matrix dimensions)
00238 // Q = Q.row(0) + R  -> problem (output Q is set to size of Q.row(0) which may destroy input Q)
00239 // Q = R.row(0) + Q  -> problem (output Q is set to size of R.row(0) which may destroy input Q)
00240 
00241 template<typename eT>
00242 inline
00243 void
00244 glue_plus::apply(Mat<eT>& out, const Glue< subview<eT>, Mat<eT>, glue_plus>& X)
00245   {
00246   arma_extra_debug_sigprint();
00247   
00248   const Mat<eT>& orig_A = X.A.m;
00249   
00250   const unwrap_check< Mat<eT> > tmp(X.B, out);
00251   const Mat<eT>& orig_B = tmp.M;
00252   
00253   if( &out != &orig_A )
00254     {
00255     const u32 sub_A_n_rows = X.A.n_rows;
00256     const u32 sub_A_n_cols = X.A.n_cols;
00257     
00258     arma_debug_assert_same_size(X.A, X.B, "matrix addition");
00259       
00260     out.set_size(sub_A_n_rows, sub_A_n_cols);
00261     
00262     for(u32 col = 0; col<sub_A_n_cols; ++col)
00263       {
00264       const u32 A_col_mod = X.A.aux_col1 + col;
00265       
00266       for(u32 row = 0; row<sub_A_n_rows; ++row)
00267         {
00268         const u32 A_row_mod = X.A.aux_row1 + row;
00269         
00270         out.at(row,col) =  orig_A.at(A_row_mod, A_col_mod) + orig_B.at(row, col);
00271         }
00272       
00273       }
00274     }
00275   else
00276     {
00277     const Mat<eT> processed_A(X.A);
00278     glue_plus::apply(out, processed_A, orig_B);
00279     }
00280   
00281   
00282   }
00283 
00284 
00285 // possible aliasing cases:
00286 // Q = Q.row(0) + Q.row(0)  -> input Q is destroyed unless Q.row(0) has the same size as Q 
00287 // Q = Q.row(0) + R.row(0)  -> input Q is destroyed unless Q.row(0) has the same size as Q 
00288 // Q = R.row(0) + Q.row(0)  -> input Q is destroyed
00289 
00290 template<typename eT>
00291 inline
00292 void
00293 glue_plus::apply(Mat<eT>& out, const Glue< subview<eT>, subview<eT>, glue_plus>& X)
00294   {
00295   arma_extra_debug_sigprint();
00296   
00297   const Mat<eT>& orig_A = X.A.m;
00298   const Mat<eT>& orig_B = X.B.m;
00299   
00300   if( (&out != &orig_A) && (&out != &orig_B) )
00301     {
00302     const u32 sub_A_n_rows = X.A.n_rows;
00303     const u32 sub_A_n_cols = X.A.n_cols;
00304     
00305     //const u32 sub_B_n_rows = X.B.n_rows;
00306     //const u32 sub_B_n_cols = X.B.n_cols;
00307     
00308     arma_debug_assert_same_size(X.A, X.B, "matrix addition");
00309       
00310     out.set_size(sub_A_n_rows, sub_A_n_cols);
00311     
00312     for(u32 col = 0; col<sub_A_n_cols; ++col)
00313       {
00314       const u32 A_col_mod = X.A.aux_col1 + col;
00315       const u32 B_col_mod = X.B.aux_col1 + col;
00316       
00317       for(u32 row = 0; row<sub_A_n_rows; ++row)
00318         {
00319         const u32 A_row_mod = X.A.aux_row1 + row;
00320         const u32 B_row_mod = X.B.aux_row1 + row;
00321         
00322         out.at(row,col) =  orig_A.at(A_row_mod, A_col_mod) + orig_B.at(B_row_mod, B_col_mod);
00323         }
00324       
00325       }
00326     }
00327   else
00328     {
00329     const Mat<eT> processed_A(X.A);
00330     const Mat<eT> processed_B(X.B);
00331     
00332     glue_plus::apply(out, processed_A, processed_B);
00333     }
00334   }
00335 
00336 
00337 
00338 template<typename eT>
00339 inline void glue_plus::apply_inplace(Mat<eT>& out, const Mat<eT>& B)
00340   {
00341   arma_extra_debug_sigprint();
00342   
00343   arma_debug_assert_same_size(out, B, "matrix addition");
00344   
00345   
00346         eT* out_mem = out.memptr();
00347   const eT* B_mem   = B.mem;
00348   
00349   const u32 n_elem  = B.n_elem;
00350   
00351   for(u32 i=0; i<n_elem; ++i)
00352     {
00353     out_mem[i] += B_mem[i];
00354     }
00355   
00356   }
00357 
00358 
00359 
00360 template<typename T1, typename op_type>
00361 inline
00362 void
00363 glue_plus::apply_inplace(Mat<typename T1::elem_type>& out, const Op<T1, op_type>& X)
00364   {
00365   arma_extra_debug_sigprint();
00366   
00367   typedef typename T1::elem_type eT;
00368   
00369   const Mat<eT> tmp(X);
00370   glue_plus::apply(out, out, tmp);
00371   }
00372   
00373 
00374 
00375 template<typename T1>
00376 inline
00377 void
00378 glue_plus::apply_inplace(Mat<typename T1::elem_type>& out, const Op<T1, op_square>& X)
00379   {
00380   arma_extra_debug_sigprint();
00381   
00382   typedef typename T1::elem_type eT;
00383   
00384   const unwrap<T1> tmp(X.m);
00385   const Mat<eT>& B = tmp.M;
00386   
00387   arma_debug_assert_same_size(out, B, "matrix addition");
00388     
00389         eT* out_mem = out.memptr();
00390   const eT* B_mem   = B.mem;
00391   
00392   const u32 n_elem  = out.n_elem;
00393   
00394   for(u32 i=0; i<n_elem; ++i)
00395     {
00396     const eT tmp_val = B_mem[i];
00397     out_mem[i] += tmp_val*tmp_val;
00398     }
00399   }
00400 
00401 
00402 
00403 template<typename T1, typename T2, typename glue_type>
00404 inline
00405 void
00406 glue_plus::apply_inplace(Mat<typename T1::elem_type>& out, const Glue<T1, T2, glue_type>& X)
00407   {
00408   arma_extra_debug_sigprint();
00409     
00410   out = X + out;
00411   }
00412 
00413 
00414 
00415 template<typename T1, typename T2>
00416 inline
00417 void
00418 glue_plus::apply_inplace(Mat<typename T1::elem_type>& out, const Glue<T1, T2, glue_times>& X)
00419   {
00420   arma_extra_debug_sigprint();
00421   
00422   typedef typename T1::elem_type eT;
00423   
00424   const unwrap_check<T1> tmp1(X.A, out);
00425   const unwrap_check<T2> tmp2(X.B, out);
00426   
00427   const Mat<eT>& A = tmp1.M;
00428   const Mat<eT>& B = tmp2.M;
00429   
00430   arma_debug_assert_mul_size(A, B, "matrix multiplication");
00431   arma_debug_assert_same_size(out.n_rows, out.n_cols, A.n_rows, B.n_cols, "matrix addition");
00432   
00433   gemm<false,false,false,true>::apply(out, A, B, eT(1), eT(1));
00434   }
00435 
00436 
00437 
00438 //
00439 //
00440 //
00441 
00442 
00443 
00444 template<typename T1, typename T2>
00445 inline
00446 void
00447 glue_plus::apply
00448   (
00449   Mat<typename T1::elem_type>& out,
00450   const Glue< Glue< T1, Col<typename T1::elem_type>, glue_times_vec>, T2, glue_plus>& in
00451   )
00452   {
00453   arma_extra_debug_sigprint();
00454   
00455   typedef typename T1::elem_type eT;
00456   
00457   const unwrap<T1> tmp1(in.A.A);
00458   const unwrap<T2> tmp2(in.B);
00459   
00460   const Mat<eT>& A = tmp1.M;
00461   const Col<eT>& B = in.A.B;
00462   const Mat<eT>& C = tmp2.M;
00463   
00464   arma_debug_assert_mul_size(A, B, "matrix multiplication");
00465   arma_debug_assert_same_size(A.n_rows, B.n_cols, C.n_rows, C.n_cols, "matrix addition");
00466   
00467   if( (&out != &A) && (&out != &B) )
00468     {
00469     out = C;
00470     gemv<false,false,true>::apply(out.memptr(), A, B.mem, eT(1), eT(1));
00471     }
00472   else
00473     {
00474     const unwrap_check< Mat<eT> > tmpA(A,out);
00475     const unwrap_check< Col<eT> > tmpB(B,out);
00476     
00477     const Mat<eT>& A_safe = tmpA.M;
00478     const Col<eT>& B_safe = tmpB.M;
00479     
00480     out = C;
00481     gemv<false,false,true>::apply(out.memptr(), A_safe, B_safe.mem, eT(1), eT(1));
00482     }
00483   
00484   }
00485 
00486 
00487 
00488 template<typename T1, typename T2>
00489 inline
00490 void
00491 glue_plus::apply
00492   (
00493   Mat<typename T1::elem_type>& out,
00494   const Glue< Glue< Row<typename T1::elem_type>, T1, glue_times_vec>, T2, glue_plus>& in
00495   )
00496   {
00497   arma_extra_debug_sigprint();
00498   
00499   typedef typename T1::elem_type eT;
00500   
00501   const unwrap<T1> tmp1(in.A.B);
00502   const unwrap<T2> tmp2(in.B);
00503   
00504   const Row<eT>& A = in.A.A;
00505   const Mat<eT>& B = tmp1.M;
00506   const Mat<eT>& C = tmp2.M;
00507   
00508   arma_debug_assert_mul_size(A, B, "matrix multiplication");
00509   arma_debug_assert_same_size(A.n_rows, B.n_cols, C.n_rows, C.n_cols, "matrix addition");
00510   
00511   if( (&out != &A) && (&out != &B) )
00512     {
00513     out = C;
00514     gemv<true,false,true>::apply(out.memptr(), B, A.mem, eT(1), eT(1));
00515     }
00516   else
00517     {
00518     const unwrap_check< Mat<eT> > tmpA(A,out);
00519     const unwrap_check< Mat<eT> > tmpB(B,out);
00520     
00521     const Mat<eT>& A_safe = tmpA.M;
00522     const Mat<eT>& B_safe = tmpB.M;
00523     
00524     out = C;
00525     gemv<true,false,true>::apply(out.memptr(), B_safe, A_safe.mem, eT(1), eT(1));
00526     }
00527   
00528   }
00529 
00530 
00531 
00532 template<typename T1, typename T2>
00533 inline
00534 void
00535 glue_plus::apply
00536   (
00537   Mat<typename T1::elem_type>& out,
00538   const Glue<Op<T1, op_scalar_times>, Op<T2, op_scalar_times>, glue_plus>& in
00539   )
00540   {
00541   arma_extra_debug_sigprint();
00542   
00543   typedef typename T1::elem_type eT;
00544   
00545   const unwrap<T1> tmp1(in.A.m);
00546   const unwrap<T2> tmp2(in.B.m);
00547   
00548   const Mat<eT>& A = tmp1.M;
00549   const Mat<eT>& B = tmp2.M;
00550   
00551   arma_debug_assert_same_size(A, B, "matrix addition");
00552   
00553   out.set_size(A.n_rows, A.n_cols);
00554   
00555   const eT k1 = in.A.aux;
00556   const eT k2 = in.B.aux;
00557   
00558         eT* out_mem = out.memptr();
00559   const eT* A_mem   = A.mem;
00560   const eT* B_mem   = B.mem;
00561   
00562   const u32 local_n_elem = A.n_elem;
00563   
00564   for(u32 i=0; i<local_n_elem; ++i)
00565     {
00566     out_mem[i] = k1*A_mem[i] + k2*B_mem[i];
00567     }
00568   
00569   }
00570 
00571 
00572 
00573 template<typename T1, typename T2, typename T3>
00574 inline
00575 void
00576 glue_plus::apply
00577   (
00578   Mat<typename T1::elem_type>& out,
00579   const Glue< Glue<Op<T1, op_scalar_times>, Op<T2, op_scalar_times>, glue_plus>, Op<T3, op_scalar_times>, glue_plus>& in
00580   )
00581   {
00582   arma_extra_debug_sigprint();
00583   
00584   typedef typename T1::elem_type eT;
00585   
00586   const unwrap<T1> tmp1(in.A.A.m);
00587   const unwrap<T2> tmp2(in.A.B.m);
00588   const unwrap<T3> tmp3(in.B.m);
00589   
00590   const Mat<eT>& A = tmp1.M;
00591   const Mat<eT>& B = tmp2.M;
00592   const Mat<eT>& C = tmp3.M;
00593   
00594   arma_debug_assert_same_size(A, B, "matrix addition");
00595   arma_debug_assert_same_size(B, C, "matrix addition");
00596   
00597   out.set_size(A.n_rows, A.n_cols);
00598   
00599   const eT k1 = in.A.A.aux;
00600   const eT k2 = in.A.B.aux;
00601   const eT k3 = in.B.aux;
00602   
00603         eT* out_mem = out.memptr();
00604   const eT* A_mem   = A.mem;
00605   const eT* B_mem   = B.mem;
00606   const eT* C_mem   = C.mem;
00607   
00608   const u32 local_n_elem = A.n_elem;
00609   
00610   for(u32 i=0; i<local_n_elem; ++i)
00611     {
00612     out_mem[i] = k1*A_mem[i] + k2*B_mem[i] + k3*C_mem[i];
00613     }
00614   
00615   }
00616 
00617 
00618 
00619 template<typename T1, typename T2>
00620 inline
00621 void
00622 glue_plus::apply
00623   (
00624   Mat<typename T1::elem_type>& out,
00625   const Glue<Op<T1, op_scalar_div_pre>, Op<T2, op_scalar_div_pre>, glue_plus>& in
00626   )
00627   {
00628   arma_extra_debug_sigprint();
00629   
00630   typedef typename T1::elem_type eT;
00631   
00632   const unwrap<T1> tmp1(in.A.m);
00633   const unwrap<T2> tmp2(in.B.m);
00634   
00635   const Mat<eT>& A = tmp1.M;
00636   const Mat<eT>& B = tmp2.M;
00637   
00638   arma_debug_assert_same_size(A, B, "matrix addition");
00639   
00640   out.set_size(A.n_rows, A.n_cols);
00641   
00642   const eT k1 = in.A.aux;
00643   const eT k2 = in.B.aux;
00644   
00645         eT* out_mem = out.memptr();
00646   const eT* A_mem   = A.mem;
00647   const eT* B_mem   = B.mem;
00648   
00649   const u32 local_n_elem = A.n_elem;
00650   
00651   for(u32 i=0; i<local_n_elem; ++i)
00652     {
00653     out_mem[i] = k1/A_mem[i] + k2/B_mem[i];
00654     }
00655   
00656   }
00657 
00658 
00659 
00660 template<typename T1, typename T2, typename T3>
00661 inline
00662 void
00663 glue_plus::apply
00664   (
00665   Mat<typename T1::elem_type>& out,
00666   const Glue< Glue<Op<T1, op_scalar_div_pre>, Op<T2, op_scalar_div_pre>, glue_plus>, Op<T3, op_scalar_div_pre>, glue_plus>& in
00667   )
00668   {
00669   arma_extra_debug_sigprint();
00670   
00671   typedef typename T1::elem_type eT;
00672   
00673   const unwrap<T1> tmp1(in.A.A.m);
00674   const unwrap<T2> tmp2(in.A.B.m);
00675   const unwrap<T3> tmp3(in.B.m);
00676   
00677   const Mat<eT>& A = tmp1.M;
00678   const Mat<eT>& B = tmp2.M;
00679   const Mat<eT>& C = tmp3.M;
00680   
00681   arma_debug_assert_same_size(A, B, "matrix addition");
00682   arma_debug_assert_same_size(B, C, "matrix addition");
00683   
00684   out.set_size(A.n_rows, A.n_cols);
00685   
00686   const eT k1 = in.A.A.aux;
00687   const eT k2 = in.A.B.aux;
00688   const eT k3 = in.B.aux;
00689   
00690         eT* out_mem = out.memptr();
00691   const eT* A_mem   = A.mem;
00692   const eT* B_mem   = B.mem;
00693   const eT* C_mem   = C.mem;
00694   
00695   const u32 local_n_elem = A.n_elem;
00696   
00697   for(u32 i=0; i<local_n_elem; ++i)
00698     {
00699     out_mem[i] = k1/A_mem[i] + k2/B_mem[i] + k3/C_mem[i];
00700     }
00701   
00702   }
00703 
00704 
00705 
00706 template<typename T1, typename T2>
00707 inline
00708 void
00709 glue_plus::apply
00710   (
00711   Mat<typename T1::elem_type>& out,
00712   const Glue<Op<T1, op_scalar_div_post>, Op<T2, op_scalar_div_post>, glue_plus>& in
00713   )
00714   {
00715   arma_extra_debug_sigprint();
00716   
00717   typedef typename T1::elem_type eT;
00718   
00719   const unwrap<T1> tmp1(in.A.m);
00720   const unwrap<T2> tmp2(in.B.m);
00721   
00722   const Mat<eT>& A = tmp1.M;
00723   const Mat<eT>& B = tmp2.M;
00724   
00725   arma_debug_assert_same_size(A, B, "matrix addition");
00726   
00727   out.set_size(A.n_rows, A.n_cols);
00728   
00729   const eT k1 = in.A.aux;
00730   const eT k2 = in.B.aux;
00731   
00732         eT* out_mem = out.memptr();
00733   const eT* A_mem   = A.mem;
00734   const eT* B_mem   = B.mem;
00735   
00736   const u32 local_n_elem = A.n_elem;
00737   
00738   for(u32 i=0; i<local_n_elem; ++i)
00739     {
00740     out_mem[i] = A_mem[i]/k1 + B_mem[i]/k2;
00741     }
00742   
00743   }
00744 
00745 
00746 
00747 template<typename T1, typename T2, typename T3>
00748 inline
00749 void
00750 glue_plus::apply
00751   (
00752   Mat<typename T1::elem_type>& out,
00753   const Glue< Glue<Op<T1, op_scalar_div_post>, Op<T2, op_scalar_div_post>, glue_plus>, Op<T3, op_scalar_div_post>, glue_plus>& in
00754   )
00755   {
00756   arma_extra_debug_sigprint();
00757   
00758   typedef typename T1::elem_type eT;
00759   
00760   const unwrap<T1> tmp1(in.A.A.m);
00761   const unwrap<T2> tmp2(in.A.B.m);
00762   const unwrap<T3> tmp3(in.B.m);
00763   
00764   const Mat<eT>& A = tmp1.M;
00765   const Mat<eT>& B = tmp2.M;
00766   const Mat<eT>& C = tmp3.M;
00767   
00768   arma_debug_assert_same_size(A, B, "matrix addition");
00769   arma_debug_assert_same_size(B, C, "matrix addition");
00770   
00771   out.set_size(A.n_rows, A.n_cols);
00772   
00773   const eT k1 = in.A.A.aux;
00774   const eT k2 = in.A.B.aux;
00775   const eT k3 = in.B.aux;
00776   
00777         eT* out_mem = out.memptr();
00778   const eT* A_mem   = A.mem;
00779   const eT* B_mem   = B.mem;
00780   const eT* C_mem   = C.mem;
00781   
00782   const u32 local_n_elem = A.n_elem;
00783   
00784   for(u32 i=0; i<local_n_elem; ++i)
00785     {
00786     out_mem[i] = A_mem[i]/k1 + B_mem[i]/k2 + C_mem[i]/k3;
00787     }
00788   
00789   }
00790 
00791 
00792 
00793 //
00794 // matrix addition with different element types
00795 
00796 template<typename eT1, typename eT2>
00797 inline
00798 void
00799 glue_plus::apply_mixed(Mat<typename promote_type<eT1,eT2>::result>& out, const Mat<eT1>& X, const Mat<eT2>& Y)
00800   {
00801   arma_extra_debug_sigprint();
00802   
00803   typedef typename promote_type<eT1,eT2>::result out_eT;
00804   
00805   arma_debug_assert_same_size(X,Y, "matrix addition");  
00806   
00807   out.set_size(X.n_rows, X.n_cols);
00808   
00809         out_eT* out_mem = out.memptr();
00810   const eT1*    X_mem   = X.mem;
00811   const eT2*    Y_mem   = Y.mem;
00812   
00813   const u32 n_elem = out.n_elem;
00814   
00815   for(u32 i=0; i<n_elem; ++i)
00816     {
00817     out_mem[i] = upgrade_val<eT1,eT2>::apply(X_mem[i]) + upgrade_val<eT1,eT2>::apply(Y_mem[i]);
00818     }
00819   }
00820 
00821 
00822 
00823 //
00824 // glue_plus_diag
00825 
00826 
00827 template<typename T1, typename T2>
00828 inline
00829 void
00830 glue_plus_diag::apply(Mat<typename T1::elem_type>& out, const T1& A_orig, const Op<T2,op_diagmat>& B_orig)
00831   {
00832   arma_extra_debug_sigprint();
00833   
00834   isnt_same_type<typename T1::elem_type, typename T2::elem_type>::check();
00835   
00836   typedef typename T1::elem_type eT;
00837   
00838   const unwrap<T1> tmp1(A_orig);
00839   const unwrap<T2> tmp2(B_orig.m);
00840   
00841   const Mat<eT>& A = tmp1.M;  
00842   const Mat<eT>& B = tmp2.M;
00843   
00844   arma_debug_check( !B.is_square(), "glue_plus_diag::apply(): matrices must be square" );
00845   arma_debug_assert_same_size(A, B, "matrix addition");
00846 
00847   
00848   // no aliasing problem
00849   out.set_size(A.n_rows, A.n_cols);
00850   
00851   for(u32 col=0; col<A.n_cols; ++col)
00852     {
00853     for(u32 row=0; row<A.n_rows; ++row)
00854       {
00855       if(col != row)
00856         {
00857         out.at(row,col) = A.at(row,col);
00858         }
00859       else
00860         {
00861         out.at(row,col) = A.at(row,col) + B.at(row,col);
00862         }
00863       }
00864     }
00865   
00866   }
00867 
00868 
00869 
00870 template<typename T1, typename T2>
00871 inline
00872 void
00873 glue_plus_diag::apply(Mat<typename T1::elem_type>& out, const Op<T1,op_diagmat>& A_orig, const Op<T2,op_diagmat>& B_orig)
00874   {
00875   arma_extra_debug_sigprint();
00876   
00877   isnt_same_type<typename T1::elem_type, typename T2::elem_type>::check();
00878   
00879   const unwrap<T1> tmp1(A_orig.m);
00880   const unwrap<T2> tmp2(B_orig.m);
00881   
00882   typedef typename T1::elem_type eT;
00883   
00884   const Mat<eT>& A = tmp1.M;
00885   const Mat<eT>& B = tmp2.M;
00886   
00887   arma_debug_check( !A.is_square(), "glue_plus_diag::apply(): matrices must be square" );
00888   arma_debug_assert_same_size(A, B, "matrix addition");
00889   
00890   
00891   if( (&out != &A) && (&out != &B) )
00892     {
00893     out.zeros(A.n_rows, A.n_cols);
00894     
00895     for(u32 i=0; i<A.n_rows; ++i)
00896       {
00897       out.at(i,i) = A.at(i,i) + B.at(i,i);
00898       }
00899     }
00900   else
00901     {
00902     out.set_size(A.n_rows, A.n_cols);
00903   
00904     for(u32 col=0; col<A.n_cols; ++col)
00905       {
00906       for(u32 row=0; row<A.n_rows; ++row)
00907         {
00908         if(col != row)
00909           {
00910           out.at(row,col) = 0.0;
00911           }
00912         else
00913           {
00914           out.at(row,col) = A.at(row,col) + B.at(row,col);
00915           }
00916         }
00917       }
00918     }
00919   
00920   }
00921 
00922 
00923 
00924 template<typename T1, typename T2>
00925 inline
00926 void
00927 glue_plus_diag::apply(Mat<typename T1::elem_type>& out, const Glue<T1, Op<T2,op_diagmat>, glue_plus_diag>& X)
00928   {
00929   glue_plus_diag::apply(out, X.A, X.B);
00930   }
00931 
00932 
00933 
00934 template<typename T1, typename T2>
00935 inline
00936 void
00937 glue_plus_diag::apply(Mat<typename T1::elem_type>& out, const Glue<Op<T1,op_diagmat>, T2, glue_plus_diag>& X)
00938   {
00939   glue_plus_diag::apply(out, X.B, X.A);  // NOTE: arguments are swapped
00940   }
00941 
00942 
00943 
00944 template<typename T1, typename T2>
00945 inline
00946 void
00947 glue_plus_diag::apply(Mat<typename T1::elem_type>& out, const Glue<Op<T1,op_diagmat>, Op<T2,op_diagmat>, glue_plus_diag>& X)
00948   {
00949   glue_plus_diag::apply(out, X.A, X.B);
00950   }
00951 
00952 
00953 
00954 //! @}