ergo
|
00001 /* Ergo, version 3.2, a program for linear scaling electronic structure 00002 * calculations. 00003 * Copyright (C) 2012 Elias Rudberg, Emanuel H. Rubensson, and Pawel Salek. 00004 * 00005 * This program is free software: you can redistribute it and/or modify 00006 * it under the terms of the GNU General Public License as published by 00007 * the Free Software Foundation, either version 3 of the License, or 00008 * (at your option) any later version. 00009 * 00010 * This program is distributed in the hope that it will be useful, 00011 * but WITHOUT ANY WARRANTY; without even the implied warranty of 00012 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00013 * GNU General Public License for more details. 00014 * 00015 * You should have received a copy of the GNU General Public License 00016 * along with this program. If not, see <http://www.gnu.org/licenses/>. 00017 * 00018 * Primary academic reference: 00019 * KohnâSham Density Functional Theory Electronic Structure Calculations 00020 * with Linearly Scaling Computational Time and Memory Usage, 00021 * Elias Rudberg, Emanuel H. Rubensson, and Pawel Salek, 00022 * J. Chem. Theory Comput. 7, 340 (2011), 00023 * <http://dx.doi.org/10.1021/ct100611z> 00024 * 00025 * For further information about Ergo, see <http://www.ergoscf.org>. 00026 */ 00027 00028 /* This file belongs to the template_lapack part of the Ergo source 00029 * code. The source files in the template_lapack directory are modified 00030 * versions of files originally distributed as CLAPACK, see the 00031 * Copyright/license notice in the file template_lapack/COPYING. 00032 */ 00033 00034 00035 #ifndef TEMPLATE_LAPACK_LAR1V_HEADER 00036 #define TEMPLATE_LAPACK_LAR1V_HEADER 00037 00038 template<class Treal> 00039 int template_lapack_lar1v(integer *n, integer *b1, integer *bn, Treal 00040 *lambda, Treal *d__, Treal *l, Treal *ld, Treal * 00041 lld, Treal *pivmin, Treal *gaptol, Treal *z__, logical 00042 *wantnc, integer *negcnt, Treal *ztz, Treal *mingma, 00043 integer *r__, integer *isuppz, Treal *nrminv, Treal *resid, 00044 Treal *rqcorr, Treal *work) 00045 { 00046 /* System generated locals */ 00047 integer i__1; 00048 Treal d__1, d__2, d__3; 00049 00050 00051 /* Local variables */ 00052 integer i__; 00053 Treal s; 00054 integer r1, r2; 00055 Treal eps, tmp; 00056 integer neg1, neg2, indp, inds; 00057 Treal dplus; 00058 integer indlpl, indumn; 00059 Treal dminus; 00060 logical sawnan1, sawnan2; 00061 00062 00063 /* -- LAPACK auxiliary routine (version 3.2) -- */ 00064 /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ 00065 /* November 2006 */ 00066 00067 /* .. Scalar Arguments .. */ 00068 /* .. */ 00069 /* .. Array Arguments .. */ 00070 /* .. */ 00071 00072 /* Purpose */ 00073 /* ======= */ 00074 00075 /* DLAR1V computes the (scaled) r-th column of the inverse of */ 00076 /* the sumbmatrix in rows B1 through BN of the tridiagonal matrix */ 00077 /* L D L^T - sigma I. When sigma is close to an eigenvalue, the */ 00078 /* computed vector is an accurate eigenvector. Usually, r corresponds */ 00079 /* to the index where the eigenvector is largest in magnitude. */ 00080 /* The following steps accomplish this computation : */ 00081 /* (a) Stationary qd transform, L D L^T - sigma I = L(+) D(+) L(+)^T, */ 00082 /* (b) Progressive qd transform, L D L^T - sigma I = U(-) D(-) U(-)^T, */ 00083 /* (c) Computation of the diagonal elements of the inverse of */ 00084 /* L D L^T - sigma I by combining the above transforms, and choosing */ 00085 /* r as the index where the diagonal of the inverse is (one of the) */ 00086 /* largest in magnitude. */ 00087 /* (d) Computation of the (scaled) r-th column of the inverse using the */ 00088 /* twisted factorization obtained by combining the top part of the */ 00089 /* the stationary and the bottom part of the progressive transform. */ 00090 00091 /* Arguments */ 00092 /* ========= */ 00093 00094 /* N (input) INTEGER */ 00095 /* The order of the matrix L D L^T. */ 00096 00097 /* B1 (input) INTEGER */ 00098 /* First index of the submatrix of L D L^T. */ 00099 00100 /* BN (input) INTEGER */ 00101 /* Last index of the submatrix of L D L^T. */ 00102 00103 /* LAMBDA (input) DOUBLE PRECISION */ 00104 /* The shift. In order to compute an accurate eigenvector, */ 00105 /* LAMBDA should be a good approximation to an eigenvalue */ 00106 /* of L D L^T. */ 00107 00108 /* L (input) DOUBLE PRECISION array, dimension (N-1) */ 00109 /* The (n-1) subdiagonal elements of the unit bidiagonal matrix */ 00110 /* L, in elements 1 to N-1. */ 00111 00112 /* D (input) DOUBLE PRECISION array, dimension (N) */ 00113 /* The n diagonal elements of the diagonal matrix D. */ 00114 00115 /* LD (input) DOUBLE PRECISION array, dimension (N-1) */ 00116 /* The n-1 elements L(i)*D(i). */ 00117 00118 /* LLD (input) DOUBLE PRECISION array, dimension (N-1) */ 00119 /* The n-1 elements L(i)*L(i)*D(i). */ 00120 00121 /* PIVMIN (input) DOUBLE PRECISION */ 00122 /* The minimum pivot in the Sturm sequence. */ 00123 00124 /* GAPTOL (input) DOUBLE PRECISION */ 00125 /* Tolerance that indicates when eigenvector entries are negligible */ 00126 /* w.r.t. their contribution to the residual. */ 00127 00128 /* Z (input/output) DOUBLE PRECISION array, dimension (N) */ 00129 /* On input, all entries of Z must be set to 0. */ 00130 /* On output, Z contains the (scaled) r-th column of the */ 00131 /* inverse. The scaling is such that Z(R) equals 1. */ 00132 00133 /* WANTNC (input) LOGICAL */ 00134 /* Specifies whether NEGCNT has to be computed. */ 00135 00136 /* NEGCNT (output) INTEGER */ 00137 /* If WANTNC is .TRUE. then NEGCNT = the number of pivots < pivmin */ 00138 /* in the matrix factorization L D L^T, and NEGCNT = -1 otherwise. */ 00139 00140 /* ZTZ (output) DOUBLE PRECISION */ 00141 /* The square of the 2-norm of Z. */ 00142 00143 /* MINGMA (output) DOUBLE PRECISION */ 00144 /* The reciprocal of the largest (in magnitude) diagonal */ 00145 /* element of the inverse of L D L^T - sigma I. */ 00146 00147 /* R (input/output) INTEGER */ 00148 /* The twist index for the twisted factorization used to */ 00149 /* compute Z. */ 00150 /* On input, 0 <= R <= N. If R is input as 0, R is set to */ 00151 /* the index where (L D L^T - sigma I)^{-1} is largest */ 00152 /* in magnitude. If 1 <= R <= N, R is unchanged. */ 00153 /* On output, R contains the twist index used to compute Z. */ 00154 /* Ideally, R designates the position of the maximum entry in the */ 00155 /* eigenvector. */ 00156 00157 /* ISUPPZ (output) INTEGER array, dimension (2) */ 00158 /* The support of the vector in Z, i.e., the vector Z is */ 00159 /* nonzero only in elements ISUPPZ(1) through ISUPPZ( 2 ). */ 00160 00161 /* NRMINV (output) DOUBLE PRECISION */ 00162 /* NRMINV = 1/SQRT( ZTZ ) */ 00163 00164 /* RESID (output) DOUBLE PRECISION */ 00165 /* The residual of the FP vector. */ 00166 /* RESID = ABS( MINGMA )/SQRT( ZTZ ) */ 00167 00168 /* RQCORR (output) DOUBLE PRECISION */ 00169 /* The Rayleigh Quotient correction to LAMBDA. */ 00170 /* RQCORR = MINGMA*TMP */ 00171 00172 /* WORK (workspace) DOUBLE PRECISION array, dimension (4*N) */ 00173 00174 /* Further Details */ 00175 /* =============== */ 00176 00177 /* Based on contributions by */ 00178 /* Beresford Parlett, University of California, Berkeley, USA */ 00179 /* Jim Demmel, University of California, Berkeley, USA */ 00180 /* Inderjit Dhillon, University of Texas, Austin, USA */ 00181 /* Osni Marques, LBNL/NERSC, USA */ 00182 /* Christof Voemel, University of California, Berkeley, USA */ 00183 00184 /* ===================================================================== */ 00185 00186 /* .. Parameters .. */ 00187 /* .. */ 00188 /* .. Local Scalars .. */ 00189 /* .. */ 00190 /* .. External Functions .. */ 00191 /* .. */ 00192 /* .. Intrinsic Functions .. */ 00193 /* .. */ 00194 /* .. Executable Statements .. */ 00195 00196 /* Parameter adjustments */ 00197 --work; 00198 --isuppz; 00199 --z__; 00200 --lld; 00201 --ld; 00202 --l; 00203 --d__; 00204 00205 /* Function Body */ 00206 eps = template_lapack_lamch("Precision", (Treal)0); 00207 if (*r__ == 0) { 00208 r1 = *b1; 00209 r2 = *bn; 00210 } else { 00211 r1 = *r__; 00212 r2 = *r__; 00213 } 00214 /* Storage for LPLUS */ 00215 indlpl = 0; 00216 /* Storage for UMINUS */ 00217 indumn = *n; 00218 inds = (*n << 1) + 1; 00219 indp = *n * 3 + 1; 00220 if (*b1 == 1) { 00221 work[inds] = 0.; 00222 } else { 00223 work[inds + *b1 - 1] = lld[*b1 - 1]; 00224 } 00225 00226 /* Compute the stationary transform (using the differential form) */ 00227 /* until the index R2. */ 00228 00229 sawnan1 = FALSE_; 00230 neg1 = 0; 00231 s = work[inds + *b1 - 1] - *lambda; 00232 i__1 = r1 - 1; 00233 for (i__ = *b1; i__ <= i__1; ++i__) { 00234 dplus = d__[i__] + s; 00235 work[indlpl + i__] = ld[i__] / dplus; 00236 if (dplus < 0.) { 00237 ++neg1; 00238 } 00239 work[inds + i__] = s * work[indlpl + i__] * l[i__]; 00240 s = work[inds + i__] - *lambda; 00241 /* L50: */ 00242 } 00243 sawnan1 = template_lapack_isnan(&s); 00244 if (sawnan1) { 00245 goto L60; 00246 } 00247 i__1 = r2 - 1; 00248 for (i__ = r1; i__ <= i__1; ++i__) { 00249 dplus = d__[i__] + s; 00250 work[indlpl + i__] = ld[i__] / dplus; 00251 work[inds + i__] = s * work[indlpl + i__] * l[i__]; 00252 s = work[inds + i__] - *lambda; 00253 /* L51: */ 00254 } 00255 sawnan1 = template_lapack_isnan(&s); 00256 00257 L60: 00258 if (sawnan1) { 00259 /* Runs a slower version of the above loop if a NaN is detected */ 00260 neg1 = 0; 00261 s = work[inds + *b1 - 1] - *lambda; 00262 i__1 = r1 - 1; 00263 for (i__ = *b1; i__ <= i__1; ++i__) { 00264 dplus = d__[i__] + s; 00265 if (absMACRO(dplus) < *pivmin) { 00266 dplus = -(*pivmin); 00267 } 00268 work[indlpl + i__] = ld[i__] / dplus; 00269 if (dplus < 0.) { 00270 ++neg1; 00271 } 00272 work[inds + i__] = s * work[indlpl + i__] * l[i__]; 00273 if (work[indlpl + i__] == 0.) { 00274 work[inds + i__] = lld[i__]; 00275 } 00276 s = work[inds + i__] - *lambda; 00277 /* L70: */ 00278 } 00279 i__1 = r2 - 1; 00280 for (i__ = r1; i__ <= i__1; ++i__) { 00281 dplus = d__[i__] + s; 00282 if (absMACRO(dplus) < *pivmin) { 00283 dplus = -(*pivmin); 00284 } 00285 work[indlpl + i__] = ld[i__] / dplus; 00286 work[inds + i__] = s * work[indlpl + i__] * l[i__]; 00287 if (work[indlpl + i__] == 0.) { 00288 work[inds + i__] = lld[i__]; 00289 } 00290 s = work[inds + i__] - *lambda; 00291 /* L71: */ 00292 } 00293 } 00294 00295 /* Compute the progressive transform (using the differential form) */ 00296 /* until the index R1 */ 00297 00298 sawnan2 = FALSE_; 00299 neg2 = 0; 00300 work[indp + *bn - 1] = d__[*bn] - *lambda; 00301 i__1 = r1; 00302 for (i__ = *bn - 1; i__ >= i__1; --i__) { 00303 dminus = lld[i__] + work[indp + i__]; 00304 tmp = d__[i__] / dminus; 00305 if (dminus < 0.) { 00306 ++neg2; 00307 } 00308 work[indumn + i__] = l[i__] * tmp; 00309 work[indp + i__ - 1] = work[indp + i__] * tmp - *lambda; 00310 /* L80: */ 00311 } 00312 tmp = work[indp + r1 - 1]; 00313 sawnan2 = template_lapack_isnan(&tmp); 00314 if (sawnan2) { 00315 /* Runs a slower version of the above loop if a NaN is detected */ 00316 neg2 = 0; 00317 i__1 = r1; 00318 for (i__ = *bn - 1; i__ >= i__1; --i__) { 00319 dminus = lld[i__] + work[indp + i__]; 00320 if (absMACRO(dminus) < *pivmin) { 00321 dminus = -(*pivmin); 00322 } 00323 tmp = d__[i__] / dminus; 00324 if (dminus < 0.) { 00325 ++neg2; 00326 } 00327 work[indumn + i__] = l[i__] * tmp; 00328 work[indp + i__ - 1] = work[indp + i__] * tmp - *lambda; 00329 if (tmp == 0.) { 00330 work[indp + i__ - 1] = d__[i__] - *lambda; 00331 } 00332 /* L100: */ 00333 } 00334 } 00335 00336 /* Find the index (from R1 to R2) of the largest (in magnitude) */ 00337 /* diagonal element of the inverse */ 00338 00339 *mingma = work[inds + r1 - 1] + work[indp + r1 - 1]; 00340 if (*mingma < 0.) { 00341 ++neg1; 00342 } 00343 if (*wantnc) { 00344 *negcnt = neg1 + neg2; 00345 } else { 00346 *negcnt = -1; 00347 } 00348 if (absMACRO(*mingma) == 0.) { 00349 *mingma = eps * work[inds + r1 - 1]; 00350 } 00351 *r__ = r1; 00352 i__1 = r2 - 1; 00353 for (i__ = r1; i__ <= i__1; ++i__) { 00354 tmp = work[inds + i__] + work[indp + i__]; 00355 if (tmp == 0.) { 00356 tmp = eps * work[inds + i__]; 00357 } 00358 if (absMACRO(tmp) <= absMACRO(*mingma)) { 00359 *mingma = tmp; 00360 *r__ = i__ + 1; 00361 } 00362 /* L110: */ 00363 } 00364 00365 /* Compute the FP vector: solve N^T v = e_r */ 00366 00367 isuppz[1] = *b1; 00368 isuppz[2] = *bn; 00369 z__[*r__] = 1.; 00370 *ztz = 1.; 00371 00372 /* Compute the FP vector upwards from R */ 00373 00374 if (! sawnan1 && ! sawnan2) { 00375 i__1 = *b1; 00376 for (i__ = *r__ - 1; i__ >= i__1; --i__) { 00377 z__[i__] = -(work[indlpl + i__] * z__[i__ + 1]); 00378 if (((d__1 = z__[i__], absMACRO(d__1)) + (d__2 = z__[i__ + 1], absMACRO( 00379 d__2))) * (d__3 = ld[i__], absMACRO(d__3)) < *gaptol) { 00380 z__[i__] = 0.; 00381 isuppz[1] = i__ + 1; 00382 goto L220; 00383 } 00384 *ztz += z__[i__] * z__[i__]; 00385 /* L210: */ 00386 } 00387 L220: 00388 ; 00389 } else { 00390 /* Run slower loop if NaN occurred. */ 00391 i__1 = *b1; 00392 for (i__ = *r__ - 1; i__ >= i__1; --i__) { 00393 if (z__[i__ + 1] == 0.) { 00394 z__[i__] = -(ld[i__ + 1] / ld[i__]) * z__[i__ + 2]; 00395 } else { 00396 z__[i__] = -(work[indlpl + i__] * z__[i__ + 1]); 00397 } 00398 if (((d__1 = z__[i__], absMACRO(d__1)) + (d__2 = z__[i__ + 1], absMACRO( 00399 d__2))) * (d__3 = ld[i__], absMACRO(d__3)) < *gaptol) { 00400 z__[i__] = 0.; 00401 isuppz[1] = i__ + 1; 00402 goto L240; 00403 } 00404 *ztz += z__[i__] * z__[i__]; 00405 /* L230: */ 00406 } 00407 L240: 00408 ; 00409 } 00410 /* Compute the FP vector downwards from R in blocks of size BLKSIZ */ 00411 if (! sawnan1 && ! sawnan2) { 00412 i__1 = *bn - 1; 00413 for (i__ = *r__; i__ <= i__1; ++i__) { 00414 z__[i__ + 1] = -(work[indumn + i__] * z__[i__]); 00415 if (((d__1 = z__[i__], absMACRO(d__1)) + (d__2 = z__[i__ + 1], absMACRO( 00416 d__2))) * (d__3 = ld[i__], absMACRO(d__3)) < *gaptol) { 00417 z__[i__ + 1] = 0.; 00418 isuppz[2] = i__; 00419 goto L260; 00420 } 00421 *ztz += z__[i__ + 1] * z__[i__ + 1]; 00422 /* L250: */ 00423 } 00424 L260: 00425 ; 00426 } else { 00427 /* Run slower loop if NaN occurred. */ 00428 i__1 = *bn - 1; 00429 for (i__ = *r__; i__ <= i__1; ++i__) { 00430 if (z__[i__] == 0.) { 00431 z__[i__ + 1] = -(ld[i__ - 1] / ld[i__]) * z__[i__ - 1]; 00432 } else { 00433 z__[i__ + 1] = -(work[indumn + i__] * z__[i__]); 00434 } 00435 if (((d__1 = z__[i__], absMACRO(d__1)) + (d__2 = z__[i__ + 1], absMACRO( 00436 d__2))) * (d__3 = ld[i__], absMACRO(d__3)) < *gaptol) { 00437 z__[i__ + 1] = 0.; 00438 isuppz[2] = i__; 00439 goto L280; 00440 } 00441 *ztz += z__[i__ + 1] * z__[i__ + 1]; 00442 /* L270: */ 00443 } 00444 L280: 00445 ; 00446 } 00447 00448 /* Compute quantities for convergence test */ 00449 00450 tmp = 1. / *ztz; 00451 *nrminv = template_blas_sqrt(tmp); 00452 *resid = absMACRO(*mingma) * *nrminv; 00453 *rqcorr = *mingma * tmp; 00454 00455 00456 return 0; 00457 00458 /* End of DLAR1V */ 00459 00460 } /* dlar1v_ */ 00461 00462 #endif