ergo
template_lapack_lasv2.h
Go to the documentation of this file.
00001 /* Ergo, version 3.2, a program for linear scaling electronic structure
00002  * calculations.
00003  * Copyright (C) 2012 Elias Rudberg, Emanuel H. Rubensson, and Pawel Salek.
00004  * 
00005  * This program is free software: you can redistribute it and/or modify
00006  * it under the terms of the GNU General Public License as published by
00007  * the Free Software Foundation, either version 3 of the License, or
00008  * (at your option) any later version.
00009  * 
00010  * This program is distributed in the hope that it will be useful,
00011  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00012  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00013  * GNU General Public License for more details.
00014  * 
00015  * You should have received a copy of the GNU General Public License
00016  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
00017  * 
00018  * Primary academic reference:
00019  * Kohn−Sham Density Functional Theory Electronic Structure Calculations 
00020  * with Linearly Scaling Computational Time and Memory Usage,
00021  * Elias Rudberg, Emanuel H. Rubensson, and Pawel Salek,
00022  * J. Chem. Theory Comput. 7, 340 (2011),
00023  * <http://dx.doi.org/10.1021/ct100611z>
00024  * 
00025  * For further information about Ergo, see <http://www.ergoscf.org>.
00026  */
00027  
00028  /* This file belongs to the template_lapack part of the Ergo source 
00029   * code. The source files in the template_lapack directory are modified
00030   * versions of files originally distributed as CLAPACK, see the
00031   * Copyright/license notice in the file template_lapack/COPYING.
00032   */
00033  
00034 
00035 #ifndef TEMPLATE_LAPACK_LASV2_HEADER
00036 #define TEMPLATE_LAPACK_LASV2_HEADER
00037 
00038 
00039 template<class Treal>
00040 int template_lapack_lasv2(const Treal *f, const Treal *g, const Treal *h__, 
00041         Treal *ssmin, Treal *ssmax, Treal *snr, Treal *
00042         csr, Treal *snl, Treal *csl)
00043 {
00044 /*  -- LAPACK auxiliary routine (version 3.0) --   
00045        Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,   
00046        Courant Institute, Argonne National Lab, and Rice University   
00047        October 31, 1992   
00048 
00049 
00050     Purpose   
00051     =======   
00052 
00053     DLASV2 computes the singular value decomposition of a 2-by-2   
00054     triangular matrix   
00055        [  F   G  ]   
00056        [  0   H  ].   
00057     On return, abs(SSMAX) is the larger singular value, abs(SSMIN) is the   
00058     smaller singular value, and (CSL,SNL) and (CSR,SNR) are the left and   
00059     right singular vectors for abs(SSMAX), giving the decomposition   
00060 
00061        [ CSL  SNL ] [  F   G  ] [ CSR -SNR ]  =  [ SSMAX   0   ]   
00062        [-SNL  CSL ] [  0   H  ] [ SNR  CSR ]     [  0    SSMIN ].   
00063 
00064     Arguments   
00065     =========   
00066 
00067     F       (input) DOUBLE PRECISION   
00068             The (1,1) element of the 2-by-2 matrix.   
00069 
00070     G       (input) DOUBLE PRECISION   
00071             The (1,2) element of the 2-by-2 matrix.   
00072 
00073     H       (input) DOUBLE PRECISION   
00074             The (2,2) element of the 2-by-2 matrix.   
00075 
00076     SSMIN   (output) DOUBLE PRECISION   
00077             abs(SSMIN) is the smaller singular value.   
00078 
00079     SSMAX   (output) DOUBLE PRECISION   
00080             abs(SSMAX) is the larger singular value.   
00081 
00082     SNL     (output) DOUBLE PRECISION   
00083     CSL     (output) DOUBLE PRECISION   
00084             The vector (CSL, SNL) is a unit left singular vector for the   
00085             singular value abs(SSMAX).   
00086 
00087     SNR     (output) DOUBLE PRECISION   
00088     CSR     (output) DOUBLE PRECISION   
00089             The vector (CSR, SNR) is a unit right singular vector for the   
00090             singular value abs(SSMAX).   
00091 
00092     Further Details   
00093     ===============   
00094 
00095     Any input parameter may be aliased with any output parameter.   
00096 
00097     Barring over/underflow and assuming a guard digit in subtraction, all   
00098     output quantities are correct to within a few units in the last   
00099     place (ulps).   
00100 
00101     In IEEE arithmetic, the code works correctly if one matrix element is   
00102     infinite.   
00103 
00104     Overflow will not occur unless the largest singular value itself   
00105     overflows or is within a few ulps of overflow. (On machines with   
00106     partial overflow, like the Cray, overflow may occur if the largest   
00107     singular value is within a factor of 2 of overflow.)   
00108 
00109     Underflow is harmless if underflow is gradual. Otherwise, results   
00110     may correspond to a matrix modified by perturbations of size near   
00111     the underflow threshold.   
00112 
00113    ===================================================================== */
00114     /* Table of constant values */
00115      Treal c_b3 = 2.;
00116      Treal c_b4 = 1.;
00117     
00118     /* System generated locals */
00119     Treal d__1;
00120     /* Local variables */
00121      integer pmax;
00122      Treal temp;
00123      logical swap;
00124      Treal a, d__, l, m, r__, s, t, tsign, fa, ga, ha;
00125      Treal ft, gt, ht, mm;
00126      logical gasmal;
00127      Treal tt, clt, crt, slt, srt;
00128 
00129      /* Initialization added by Elias to get rid of compiler warnings. */
00130      tsign = 0;
00131 
00132 
00133     ft = *f;
00134     fa = absMACRO(ft);
00135     ht = *h__;
00136     ha = absMACRO(*h__);
00137 
00138 /*     PMAX points to the maximum absolute element of matrix   
00139          PMAX = 1 if F largest in absolute values   
00140          PMAX = 2 if G largest in absolute values   
00141          PMAX = 3 if H largest in absolute values */
00142 
00143     pmax = 1;
00144     swap = ha > fa;
00145     if (swap) {
00146         pmax = 3;
00147         temp = ft;
00148         ft = ht;
00149         ht = temp;
00150         temp = fa;
00151         fa = ha;
00152         ha = temp;
00153 
00154 /*        Now FA .ge. HA */
00155 
00156     }
00157     gt = *g;
00158     ga = absMACRO(gt);
00159     if (ga == 0.) {
00160 
00161 /*        Diagonal matrix */
00162 
00163         *ssmin = ha;
00164         *ssmax = fa;
00165         clt = 1.;
00166         crt = 1.;
00167         slt = 0.;
00168         srt = 0.;
00169     } else {
00170         gasmal = TRUE_;
00171         if (ga > fa) {
00172             pmax = 2;
00173             if (fa / ga < template_lapack_lamch("EPS", (Treal)0)) {
00174 
00175 /*              Case of very large GA */
00176 
00177                 gasmal = FALSE_;
00178                 *ssmax = ga;
00179                 if (ha > 1.) {
00180                     *ssmin = fa / (ga / ha);
00181                 } else {
00182                     *ssmin = fa / ga * ha;
00183                 }
00184                 clt = 1.;
00185                 slt = ht / gt;
00186                 srt = 1.;
00187                 crt = ft / gt;
00188             }
00189         }
00190         if (gasmal) {
00191 
00192 /*           Normal case */
00193 
00194             d__ = fa - ha;
00195             if (d__ == fa) {
00196 
00197 /*              Copes with infinite F or H */
00198 
00199                 l = 1.;
00200             } else {
00201                 l = d__ / fa;
00202             }
00203 
00204 /*           Note that 0 .le. L .le. 1 */
00205 
00206             m = gt / ft;
00207 
00208 /*           Note that abs(M) .le. 1/macheps */
00209 
00210             t = 2. - l;
00211 
00212 /*           Note that T .ge. 1 */
00213 
00214             mm = m * m;
00215             tt = t * t;
00216             s = template_blas_sqrt(tt + mm);
00217 
00218 /*           Note that 1 .le. S .le. 1 + 1/macheps */
00219 
00220             if (l == 0.) {
00221                 r__ = absMACRO(m);
00222             } else {
00223                 r__ = template_blas_sqrt(l * l + mm);
00224             }
00225 
00226 /*           Note that 0 .le. R .le. 1 + 1/macheps */
00227 
00228             a = (s + r__) * .5;
00229 
00230 /*           Note that 1 .le. A .le. 1 + abs(M) */
00231 
00232             *ssmin = ha / a;
00233             *ssmax = fa * a;
00234             if (mm == 0.) {
00235 
00236 /*              Note that M is very tiny */
00237 
00238                 if (l == 0.) {
00239                     t = template_lapack_d_sign(&c_b3, &ft) * template_lapack_d_sign(&c_b4, &gt);
00240                 } else {
00241                     t = gt / template_lapack_d_sign(&d__, &ft) + m / t;
00242                 }
00243             } else {
00244                 t = (m / (s + t) + m / (r__ + l)) * (a + 1.);
00245             }
00246             l = template_blas_sqrt(t * t + 4.);
00247             crt = 2. / l;
00248             srt = t / l;
00249             clt = (crt + srt * m) / a;
00250             slt = ht / ft * srt / a;
00251         }
00252     }
00253     if (swap) {
00254         *csl = srt;
00255         *snl = crt;
00256         *csr = slt;
00257         *snr = clt;
00258     } else {
00259         *csl = clt;
00260         *snl = slt;
00261         *csr = crt;
00262         *snr = srt;
00263     }
00264 
00265 /*     Correct signs of SSMAX and SSMIN */
00266 
00267     if (pmax == 1) {
00268         tsign = template_lapack_d_sign(&c_b4, csr) * template_lapack_d_sign(&c_b4, csl) * template_lapack_d_sign(&c_b4, f);
00269     }
00270     if (pmax == 2) {
00271         tsign = template_lapack_d_sign(&c_b4, snr) * template_lapack_d_sign(&c_b4, csl) * template_lapack_d_sign(&c_b4, g);
00272     }
00273     if (pmax == 3) {
00274         tsign = template_lapack_d_sign(&c_b4, snr) * template_lapack_d_sign(&c_b4, snl) * template_lapack_d_sign(&c_b4, h__);
00275     }
00276     *ssmax = template_lapack_d_sign(ssmax, &tsign);
00277     d__1 = tsign * template_lapack_d_sign(&c_b4, f) * template_lapack_d_sign(&c_b4, h__);
00278     *ssmin = template_lapack_d_sign(ssmin, &d__1);
00279     return 0;
00280 
00281 /*     End of DLASV2 */
00282 
00283 } /* dlasv2_ */
00284 
00285 #endif