ergo
template_lapack_lar1v.h
Go to the documentation of this file.
00001 /* Ergo, version 3.2, a program for linear scaling electronic structure
00002  * calculations.
00003  * Copyright (C) 2012 Elias Rudberg, Emanuel H. Rubensson, and Pawel Salek.
00004  * 
00005  * This program is free software: you can redistribute it and/or modify
00006  * it under the terms of the GNU General Public License as published by
00007  * the Free Software Foundation, either version 3 of the License, or
00008  * (at your option) any later version.
00009  * 
00010  * This program is distributed in the hope that it will be useful,
00011  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00012  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00013  * GNU General Public License for more details.
00014  * 
00015  * You should have received a copy of the GNU General Public License
00016  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
00017  * 
00018  * Primary academic reference:
00019  * Kohn−Sham Density Functional Theory Electronic Structure Calculations 
00020  * with Linearly Scaling Computational Time and Memory Usage,
00021  * Elias Rudberg, Emanuel H. Rubensson, and Pawel Salek,
00022  * J. Chem. Theory Comput. 7, 340 (2011),
00023  * <http://dx.doi.org/10.1021/ct100611z>
00024  * 
00025  * For further information about Ergo, see <http://www.ergoscf.org>.
00026  */
00027  
00028  /* This file belongs to the template_lapack part of the Ergo source 
00029   * code. The source files in the template_lapack directory are modified
00030   * versions of files originally distributed as CLAPACK, see the
00031   * Copyright/license notice in the file template_lapack/COPYING.
00032   */
00033  
00034 
00035 #ifndef TEMPLATE_LAPACK_LAR1V_HEADER
00036 #define TEMPLATE_LAPACK_LAR1V_HEADER
00037 
00038 template<class Treal>
00039 int template_lapack_lar1v(integer *n, integer *b1, integer *bn, Treal 
00040         *lambda, Treal *d__, Treal *l, Treal *ld, Treal *
00041         lld, Treal *pivmin, Treal *gaptol, Treal *z__, logical 
00042         *wantnc, integer *negcnt, Treal *ztz, Treal *mingma, 
00043         integer *r__, integer *isuppz, Treal *nrminv, Treal *resid, 
00044         Treal *rqcorr, Treal *work)
00045 {
00046     /* System generated locals */
00047     integer i__1;
00048     Treal d__1, d__2, d__3;
00049 
00050 
00051     /* Local variables */
00052     integer i__;
00053     Treal s;
00054     integer r1, r2;
00055     Treal eps, tmp;
00056     integer neg1, neg2, indp, inds;
00057     Treal dplus;
00058     integer indlpl, indumn;
00059     Treal dminus;
00060     logical sawnan1, sawnan2;
00061 
00062 
00063 /*  -- LAPACK auxiliary routine (version 3.2) -- */
00064 /*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
00065 /*     November 2006 */
00066 
00067 /*     .. Scalar Arguments .. */
00068 /*     .. */
00069 /*     .. Array Arguments .. */
00070 /*     .. */
00071 
00072 /*  Purpose */
00073 /*  ======= */
00074 
00075 /*  DLAR1V computes the (scaled) r-th column of the inverse of */
00076 /*  the sumbmatrix in rows B1 through BN of the tridiagonal matrix */
00077 /*  L D L^T - sigma I. When sigma is close to an eigenvalue, the */
00078 /*  computed vector is an accurate eigenvector. Usually, r corresponds */
00079 /*  to the index where the eigenvector is largest in magnitude. */
00080 /*  The following steps accomplish this computation : */
00081 /*  (a) Stationary qd transform,  L D L^T - sigma I = L(+) D(+) L(+)^T, */
00082 /*  (b) Progressive qd transform, L D L^T - sigma I = U(-) D(-) U(-)^T, */
00083 /*  (c) Computation of the diagonal elements of the inverse of */
00084 /*      L D L^T - sigma I by combining the above transforms, and choosing */
00085 /*      r as the index where the diagonal of the inverse is (one of the) */
00086 /*      largest in magnitude. */
00087 /*  (d) Computation of the (scaled) r-th column of the inverse using the */
00088 /*      twisted factorization obtained by combining the top part of the */
00089 /*      the stationary and the bottom part of the progressive transform. */
00090 
00091 /*  Arguments */
00092 /*  ========= */
00093 
00094 /*  N        (input) INTEGER */
00095 /*           The order of the matrix L D L^T. */
00096 
00097 /*  B1       (input) INTEGER */
00098 /*           First index of the submatrix of L D L^T. */
00099 
00100 /*  BN       (input) INTEGER */
00101 /*           Last index of the submatrix of L D L^T. */
00102 
00103 /*  LAMBDA    (input) DOUBLE PRECISION */
00104 /*           The shift. In order to compute an accurate eigenvector, */
00105 /*           LAMBDA should be a good approximation to an eigenvalue */
00106 /*           of L D L^T. */
00107 
00108 /*  L        (input) DOUBLE PRECISION array, dimension (N-1) */
00109 /*           The (n-1) subdiagonal elements of the unit bidiagonal matrix */
00110 /*           L, in elements 1 to N-1. */
00111 
00112 /*  D        (input) DOUBLE PRECISION array, dimension (N) */
00113 /*           The n diagonal elements of the diagonal matrix D. */
00114 
00115 /*  LD       (input) DOUBLE PRECISION array, dimension (N-1) */
00116 /*           The n-1 elements L(i)*D(i). */
00117 
00118 /*  LLD      (input) DOUBLE PRECISION array, dimension (N-1) */
00119 /*           The n-1 elements L(i)*L(i)*D(i). */
00120 
00121 /*  PIVMIN   (input) DOUBLE PRECISION */
00122 /*           The minimum pivot in the Sturm sequence. */
00123 
00124 /*  GAPTOL   (input) DOUBLE PRECISION */
00125 /*           Tolerance that indicates when eigenvector entries are negligible */
00126 /*           w.r.t. their contribution to the residual. */
00127 
00128 /*  Z        (input/output) DOUBLE PRECISION array, dimension (N) */
00129 /*           On input, all entries of Z must be set to 0. */
00130 /*           On output, Z contains the (scaled) r-th column of the */
00131 /*           inverse. The scaling is such that Z(R) equals 1. */
00132 
00133 /*  WANTNC   (input) LOGICAL */
00134 /*           Specifies whether NEGCNT has to be computed. */
00135 
00136 /*  NEGCNT   (output) INTEGER */
00137 /*           If WANTNC is .TRUE. then NEGCNT = the number of pivots < pivmin */
00138 /*           in the  matrix factorization L D L^T, and NEGCNT = -1 otherwise. */
00139 
00140 /*  ZTZ      (output) DOUBLE PRECISION */
00141 /*           The square of the 2-norm of Z. */
00142 
00143 /*  MINGMA   (output) DOUBLE PRECISION */
00144 /*           The reciprocal of the largest (in magnitude) diagonal */
00145 /*           element of the inverse of L D L^T - sigma I. */
00146 
00147 /*  R        (input/output) INTEGER */
00148 /*           The twist index for the twisted factorization used to */
00149 /*           compute Z. */
00150 /*           On input, 0 <= R <= N. If R is input as 0, R is set to */
00151 /*           the index where (L D L^T - sigma I)^{-1} is largest */
00152 /*           in magnitude. If 1 <= R <= N, R is unchanged. */
00153 /*           On output, R contains the twist index used to compute Z. */
00154 /*           Ideally, R designates the position of the maximum entry in the */
00155 /*           eigenvector. */
00156 
00157 /*  ISUPPZ   (output) INTEGER array, dimension (2) */
00158 /*           The support of the vector in Z, i.e., the vector Z is */
00159 /*           nonzero only in elements ISUPPZ(1) through ISUPPZ( 2 ). */
00160 
00161 /*  NRMINV   (output) DOUBLE PRECISION */
00162 /*           NRMINV = 1/SQRT( ZTZ ) */
00163 
00164 /*  RESID    (output) DOUBLE PRECISION */
00165 /*           The residual of the FP vector. */
00166 /*           RESID = ABS( MINGMA )/SQRT( ZTZ ) */
00167 
00168 /*  RQCORR   (output) DOUBLE PRECISION */
00169 /*           The Rayleigh Quotient correction to LAMBDA. */
00170 /*           RQCORR = MINGMA*TMP */
00171 
00172 /*  WORK     (workspace) DOUBLE PRECISION array, dimension (4*N) */
00173 
00174 /*  Further Details */
00175 /*  =============== */
00176 
00177 /*  Based on contributions by */
00178 /*     Beresford Parlett, University of California, Berkeley, USA */
00179 /*     Jim Demmel, University of California, Berkeley, USA */
00180 /*     Inderjit Dhillon, University of Texas, Austin, USA */
00181 /*     Osni Marques, LBNL/NERSC, USA */
00182 /*     Christof Voemel, University of California, Berkeley, USA */
00183 
00184 /*  ===================================================================== */
00185 
00186 /*     .. Parameters .. */
00187 /*     .. */
00188 /*     .. Local Scalars .. */
00189 /*     .. */
00190 /*     .. External Functions .. */
00191 /*     .. */
00192 /*     .. Intrinsic Functions .. */
00193 /*     .. */
00194 /*     .. Executable Statements .. */
00195 
00196     /* Parameter adjustments */
00197     --work;
00198     --isuppz;
00199     --z__;
00200     --lld;
00201     --ld;
00202     --l;
00203     --d__;
00204 
00205     /* Function Body */
00206     eps = template_lapack_lamch("Precision", (Treal)0);
00207     if (*r__ == 0) {
00208         r1 = *b1;
00209         r2 = *bn;
00210     } else {
00211         r1 = *r__;
00212         r2 = *r__;
00213     }
00214 /*     Storage for LPLUS */
00215     indlpl = 0;
00216 /*     Storage for UMINUS */
00217     indumn = *n;
00218     inds = (*n << 1) + 1;
00219     indp = *n * 3 + 1;
00220     if (*b1 == 1) {
00221         work[inds] = 0.;
00222     } else {
00223         work[inds + *b1 - 1] = lld[*b1 - 1];
00224     }
00225 
00226 /*     Compute the stationary transform (using the differential form) */
00227 /*     until the index R2. */
00228 
00229     sawnan1 = FALSE_;
00230     neg1 = 0;
00231     s = work[inds + *b1 - 1] - *lambda;
00232     i__1 = r1 - 1;
00233     for (i__ = *b1; i__ <= i__1; ++i__) {
00234         dplus = d__[i__] + s;
00235         work[indlpl + i__] = ld[i__] / dplus;
00236         if (dplus < 0.) {
00237             ++neg1;
00238         }
00239         work[inds + i__] = s * work[indlpl + i__] * l[i__];
00240         s = work[inds + i__] - *lambda;
00241 /* L50: */
00242     }
00243     sawnan1 = template_lapack_isnan(&s);
00244     if (sawnan1) {
00245         goto L60;
00246     }
00247     i__1 = r2 - 1;
00248     for (i__ = r1; i__ <= i__1; ++i__) {
00249         dplus = d__[i__] + s;
00250         work[indlpl + i__] = ld[i__] / dplus;
00251         work[inds + i__] = s * work[indlpl + i__] * l[i__];
00252         s = work[inds + i__] - *lambda;
00253 /* L51: */
00254     }
00255     sawnan1 = template_lapack_isnan(&s);
00256 
00257 L60:
00258     if (sawnan1) {
00259 /*        Runs a slower version of the above loop if a NaN is detected */
00260         neg1 = 0;
00261         s = work[inds + *b1 - 1] - *lambda;
00262         i__1 = r1 - 1;
00263         for (i__ = *b1; i__ <= i__1; ++i__) {
00264             dplus = d__[i__] + s;
00265             if (absMACRO(dplus) < *pivmin) {
00266                 dplus = -(*pivmin);
00267             }
00268             work[indlpl + i__] = ld[i__] / dplus;
00269             if (dplus < 0.) {
00270                 ++neg1;
00271             }
00272             work[inds + i__] = s * work[indlpl + i__] * l[i__];
00273             if (work[indlpl + i__] == 0.) {
00274                 work[inds + i__] = lld[i__];
00275             }
00276             s = work[inds + i__] - *lambda;
00277 /* L70: */
00278         }
00279         i__1 = r2 - 1;
00280         for (i__ = r1; i__ <= i__1; ++i__) {
00281             dplus = d__[i__] + s;
00282             if (absMACRO(dplus) < *pivmin) {
00283                 dplus = -(*pivmin);
00284             }
00285             work[indlpl + i__] = ld[i__] / dplus;
00286             work[inds + i__] = s * work[indlpl + i__] * l[i__];
00287             if (work[indlpl + i__] == 0.) {
00288                 work[inds + i__] = lld[i__];
00289             }
00290             s = work[inds + i__] - *lambda;
00291 /* L71: */
00292         }
00293     }
00294 
00295 /*     Compute the progressive transform (using the differential form) */
00296 /*     until the index R1 */
00297 
00298     sawnan2 = FALSE_;
00299     neg2 = 0;
00300     work[indp + *bn - 1] = d__[*bn] - *lambda;
00301     i__1 = r1;
00302     for (i__ = *bn - 1; i__ >= i__1; --i__) {
00303         dminus = lld[i__] + work[indp + i__];
00304         tmp = d__[i__] / dminus;
00305         if (dminus < 0.) {
00306             ++neg2;
00307         }
00308         work[indumn + i__] = l[i__] * tmp;
00309         work[indp + i__ - 1] = work[indp + i__] * tmp - *lambda;
00310 /* L80: */
00311     }
00312     tmp = work[indp + r1 - 1];
00313     sawnan2 = template_lapack_isnan(&tmp);
00314     if (sawnan2) {
00315 /*        Runs a slower version of the above loop if a NaN is detected */
00316         neg2 = 0;
00317         i__1 = r1;
00318         for (i__ = *bn - 1; i__ >= i__1; --i__) {
00319             dminus = lld[i__] + work[indp + i__];
00320             if (absMACRO(dminus) < *pivmin) {
00321                 dminus = -(*pivmin);
00322             }
00323             tmp = d__[i__] / dminus;
00324             if (dminus < 0.) {
00325                 ++neg2;
00326             }
00327             work[indumn + i__] = l[i__] * tmp;
00328             work[indp + i__ - 1] = work[indp + i__] * tmp - *lambda;
00329             if (tmp == 0.) {
00330                 work[indp + i__ - 1] = d__[i__] - *lambda;
00331             }
00332 /* L100: */
00333         }
00334     }
00335 
00336 /*     Find the index (from R1 to R2) of the largest (in magnitude) */
00337 /*     diagonal element of the inverse */
00338 
00339     *mingma = work[inds + r1 - 1] + work[indp + r1 - 1];
00340     if (*mingma < 0.) {
00341         ++neg1;
00342     }
00343     if (*wantnc) {
00344         *negcnt = neg1 + neg2;
00345     } else {
00346         *negcnt = -1;
00347     }
00348     if (absMACRO(*mingma) == 0.) {
00349         *mingma = eps * work[inds + r1 - 1];
00350     }
00351     *r__ = r1;
00352     i__1 = r2 - 1;
00353     for (i__ = r1; i__ <= i__1; ++i__) {
00354         tmp = work[inds + i__] + work[indp + i__];
00355         if (tmp == 0.) {
00356             tmp = eps * work[inds + i__];
00357         }
00358         if (absMACRO(tmp) <= absMACRO(*mingma)) {
00359             *mingma = tmp;
00360             *r__ = i__ + 1;
00361         }
00362 /* L110: */
00363     }
00364 
00365 /*     Compute the FP vector: solve N^T v = e_r */
00366 
00367     isuppz[1] = *b1;
00368     isuppz[2] = *bn;
00369     z__[*r__] = 1.;
00370     *ztz = 1.;
00371 
00372 /*     Compute the FP vector upwards from R */
00373 
00374     if (! sawnan1 && ! sawnan2) {
00375         i__1 = *b1;
00376         for (i__ = *r__ - 1; i__ >= i__1; --i__) {
00377             z__[i__] = -(work[indlpl + i__] * z__[i__ + 1]);
00378             if (((d__1 = z__[i__], absMACRO(d__1)) + (d__2 = z__[i__ + 1], absMACRO(
00379                     d__2))) * (d__3 = ld[i__], absMACRO(d__3)) < *gaptol) {
00380                 z__[i__] = 0.;
00381                 isuppz[1] = i__ + 1;
00382                 goto L220;
00383             }
00384             *ztz += z__[i__] * z__[i__];
00385 /* L210: */
00386         }
00387 L220:
00388         ;
00389     } else {
00390 /*        Run slower loop if NaN occurred. */
00391         i__1 = *b1;
00392         for (i__ = *r__ - 1; i__ >= i__1; --i__) {
00393             if (z__[i__ + 1] == 0.) {
00394                 z__[i__] = -(ld[i__ + 1] / ld[i__]) * z__[i__ + 2];
00395             } else {
00396                 z__[i__] = -(work[indlpl + i__] * z__[i__ + 1]);
00397             }
00398             if (((d__1 = z__[i__], absMACRO(d__1)) + (d__2 = z__[i__ + 1], absMACRO(
00399                     d__2))) * (d__3 = ld[i__], absMACRO(d__3)) < *gaptol) {
00400                 z__[i__] = 0.;
00401                 isuppz[1] = i__ + 1;
00402                 goto L240;
00403             }
00404             *ztz += z__[i__] * z__[i__];
00405 /* L230: */
00406         }
00407 L240:
00408         ;
00409     }
00410 /*     Compute the FP vector downwards from R in blocks of size BLKSIZ */
00411     if (! sawnan1 && ! sawnan2) {
00412         i__1 = *bn - 1;
00413         for (i__ = *r__; i__ <= i__1; ++i__) {
00414             z__[i__ + 1] = -(work[indumn + i__] * z__[i__]);
00415             if (((d__1 = z__[i__], absMACRO(d__1)) + (d__2 = z__[i__ + 1], absMACRO(
00416                     d__2))) * (d__3 = ld[i__], absMACRO(d__3)) < *gaptol) {
00417                 z__[i__ + 1] = 0.;
00418                 isuppz[2] = i__;
00419                 goto L260;
00420             }
00421             *ztz += z__[i__ + 1] * z__[i__ + 1];
00422 /* L250: */
00423         }
00424 L260:
00425         ;
00426     } else {
00427 /*        Run slower loop if NaN occurred. */
00428         i__1 = *bn - 1;
00429         for (i__ = *r__; i__ <= i__1; ++i__) {
00430             if (z__[i__] == 0.) {
00431                 z__[i__ + 1] = -(ld[i__ - 1] / ld[i__]) * z__[i__ - 1];
00432             } else {
00433                 z__[i__ + 1] = -(work[indumn + i__] * z__[i__]);
00434             }
00435             if (((d__1 = z__[i__], absMACRO(d__1)) + (d__2 = z__[i__ + 1], absMACRO(
00436                     d__2))) * (d__3 = ld[i__], absMACRO(d__3)) < *gaptol) {
00437                 z__[i__ + 1] = 0.;
00438                 isuppz[2] = i__;
00439                 goto L280;
00440             }
00441             *ztz += z__[i__ + 1] * z__[i__ + 1];
00442 /* L270: */
00443         }
00444 L280:
00445         ;
00446     }
00447 
00448 /*     Compute quantities for convergence test */
00449 
00450     tmp = 1. / *ztz;
00451     *nrminv = template_blas_sqrt(tmp);
00452     *resid = absMACRO(*mingma) * *nrminv;
00453     *rqcorr = *mingma * tmp;
00454 
00455 
00456     return 0;
00457 
00458 /*     End of DLAR1V */
00459 
00460 } /* dlar1v_ */
00461 
00462 #endif