ergo
|
00001 /* Ergo, version 3.2, a program for linear scaling electronic structure 00002 * calculations. 00003 * Copyright (C) 2012 Elias Rudberg, Emanuel H. Rubensson, and Pawel Salek. 00004 * 00005 * This program is free software: you can redistribute it and/or modify 00006 * it under the terms of the GNU General Public License as published by 00007 * the Free Software Foundation, either version 3 of the License, or 00008 * (at your option) any later version. 00009 * 00010 * This program is distributed in the hope that it will be useful, 00011 * but WITHOUT ANY WARRANTY; without even the implied warranty of 00012 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00013 * GNU General Public License for more details. 00014 * 00015 * You should have received a copy of the GNU General Public License 00016 * along with this program. If not, see <http://www.gnu.org/licenses/>. 00017 * 00018 * Primary academic reference: 00019 * KohnâSham Density Functional Theory Electronic Structure Calculations 00020 * with Linearly Scaling Computational Time and Memory Usage, 00021 * Elias Rudberg, Emanuel H. Rubensson, and Pawel Salek, 00022 * J. Chem. Theory Comput. 7, 340 (2011), 00023 * <http://dx.doi.org/10.1021/ct100611z> 00024 * 00025 * For further information about Ergo, see <http://www.ergoscf.org>. 00026 */ 00027 00028 /* This file belongs to the template_lapack part of the Ergo source 00029 * code. The source files in the template_lapack directory are modified 00030 * versions of files originally distributed as CLAPACK, see the 00031 * Copyright/license notice in the file template_lapack/COPYING. 00032 */ 00033 00034 00035 #ifndef TEMPLATE_LAPACK_LARRF_HEADER 00036 #define TEMPLATE_LAPACK_LARRF_HEADER 00037 00038 00039 template<class Treal> 00040 int template_lapack_larrf(integer *n, Treal *d__, Treal *l, 00041 Treal *ld, integer *clstrt, integer *clend, Treal *w, 00042 Treal *wgap, Treal *werr, Treal *spdiam, Treal * 00043 clgapl, Treal *clgapr, Treal *pivmin, Treal *sigma, 00044 Treal *dplus, Treal *lplus, Treal *work, integer *info) 00045 { 00046 /* System generated locals */ 00047 integer i__1; 00048 Treal d__1, d__2, d__3; 00049 00050 /* Local variables */ 00051 integer i__; 00052 Treal s, bestshift, smlgrowth, eps, tmp, max1, max2, rrr1, rrr2, 00053 znm2, growthbound, fail, fact, oldp; 00054 integer indx; 00055 Treal prod; 00056 integer ktry; 00057 Treal fail2, avgap, ldmax, rdmax; 00058 integer shift; 00059 logical dorrr1; 00060 Treal ldelta; 00061 logical nofail; 00062 Treal mingap, lsigma, rdelta; 00063 logical forcer; 00064 Treal rsigma, clwdth; 00065 logical sawnan1, sawnan2, tryrrr1; 00066 00067 00068 /* -- LAPACK auxiliary routine (version 3.2) -- */ 00069 /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ 00070 /* November 2006 */ 00071 /* * */ 00072 /* .. Scalar Arguments .. */ 00073 /* .. */ 00074 /* .. Array Arguments .. */ 00075 /* .. */ 00076 00077 /* Purpose */ 00078 /* ======= */ 00079 00080 /* Given the initial representation L D L^T and its cluster of close */ 00081 /* eigenvalues (in a relative measure), W( CLSTRT ), W( CLSTRT+1 ), ... */ 00082 /* W( CLEND ), DLARRF finds a new relatively robust representation */ 00083 /* L D L^T - SIGMA I = L(+) D(+) L(+)^T such that at least one of the */ 00084 /* eigenvalues of L(+) D(+) L(+)^T is relatively isolated. */ 00085 00086 /* Arguments */ 00087 /* ========= */ 00088 00089 /* N (input) INTEGER */ 00090 /* The order of the matrix (subblock, if the matrix splitted). */ 00091 00092 /* D (input) DOUBLE PRECISION array, dimension (N) */ 00093 /* The N diagonal elements of the diagonal matrix D. */ 00094 00095 /* L (input) DOUBLE PRECISION array, dimension (N-1) */ 00096 /* The (N-1) subdiagonal elements of the unit bidiagonal */ 00097 /* matrix L. */ 00098 00099 /* LD (input) DOUBLE PRECISION array, dimension (N-1) */ 00100 /* The (N-1) elements L(i)*D(i). */ 00101 00102 /* CLSTRT (input) INTEGER */ 00103 /* The index of the first eigenvalue in the cluster. */ 00104 00105 /* CLEND (input) INTEGER */ 00106 /* The index of the last eigenvalue in the cluster. */ 00107 00108 /* W (input) DOUBLE PRECISION array, dimension >= (CLEND-CLSTRT+1) */ 00109 /* The eigenvalue APPROXIMATIONS of L D L^T in ascending order. */ 00110 /* W( CLSTRT ) through W( CLEND ) form the cluster of relatively */ 00111 /* close eigenalues. */ 00112 00113 /* WGAP (input/output) DOUBLE PRECISION array, dimension >= (CLEND-CLSTRT+1) */ 00114 /* The separation from the right neighbor eigenvalue in W. */ 00115 00116 /* WERR (input) DOUBLE PRECISION array, dimension >= (CLEND-CLSTRT+1) */ 00117 /* WERR contain the semiwidth of the uncertainty */ 00118 /* interval of the corresponding eigenvalue APPROXIMATION in W */ 00119 00120 /* SPDIAM (input) estimate of the spectral diameter obtained from the */ 00121 /* Gerschgorin intervals */ 00122 00123 /* CLGAPL, CLGAPR (input) absolute gap on each end of the cluster. */ 00124 /* Set by the calling routine to protect against shifts too close */ 00125 /* to eigenvalues outside the cluster. */ 00126 00127 /* PIVMIN (input) DOUBLE PRECISION */ 00128 /* The minimum pivot allowed in the Sturm sequence. */ 00129 00130 /* SIGMA (output) DOUBLE PRECISION */ 00131 /* The shift used to form L(+) D(+) L(+)^T. */ 00132 00133 /* DPLUS (output) DOUBLE PRECISION array, dimension (N) */ 00134 /* The N diagonal elements of the diagonal matrix D(+). */ 00135 00136 /* LPLUS (output) DOUBLE PRECISION array, dimension (N-1) */ 00137 /* The first (N-1) elements of LPLUS contain the subdiagonal */ 00138 /* elements of the unit bidiagonal matrix L(+). */ 00139 00140 /* WORK (workspace) DOUBLE PRECISION array, dimension (2*N) */ 00141 /* Workspace. */ 00142 00143 /* Further Details */ 00144 /* =============== */ 00145 00146 /* Based on contributions by */ 00147 /* Beresford Parlett, University of California, Berkeley, USA */ 00148 /* Jim Demmel, University of California, Berkeley, USA */ 00149 /* Inderjit Dhillon, University of Texas, Austin, USA */ 00150 /* Osni Marques, LBNL/NERSC, USA */ 00151 /* Christof Voemel, University of California, Berkeley, USA */ 00152 00153 /* ===================================================================== */ 00154 00155 /* .. Parameters .. */ 00156 /* .. */ 00157 /* .. Local Scalars .. */ 00158 /* .. */ 00159 /* .. External Functions .. */ 00160 /* .. */ 00161 /* .. External Subroutines .. */ 00162 /* .. */ 00163 /* .. Intrinsic Functions .. */ 00164 /* .. */ 00165 /* .. Executable Statements .. */ 00166 00167 /* Table of constant values */ 00168 00169 integer c__1 = 1; 00170 00171 00172 00173 /* Parameter adjustments */ 00174 --work; 00175 --lplus; 00176 --dplus; 00177 --werr; 00178 --wgap; 00179 --w; 00180 --ld; 00181 --l; 00182 --d__; 00183 00184 /* Initialization added by Elias to get rid of compiler warnings. */ 00185 indx = 0; 00186 /* Function Body */ 00187 *info = 0; 00188 fact = 2.; 00189 eps = template_lapack_lamch("Precision", (Treal)0); 00190 shift = 0; 00191 forcer = FALSE_; 00192 /* Note that we cannot guarantee that for any of the shifts tried, */ 00193 /* the factorization has a small or even moderate element growth. */ 00194 /* There could be Ritz values at both ends of the cluster and despite */ 00195 /* backing off, there are examples where all factorizations tried */ 00196 /* (in IEEE mode, allowing zero pivots & infinities) have INFINITE */ 00197 /* element growth. */ 00198 /* For this reason, we should use PIVMIN in this subroutine so that at */ 00199 /* least the L D L^T factorization exists. It can be checked afterwards */ 00200 /* whether the element growth caused bad residuals/orthogonality. */ 00201 /* Decide whether the code should accept the best among all */ 00202 /* representations despite large element growth or signal INFO=1 */ 00203 nofail = TRUE_; 00204 00205 /* Compute the average gap length of the cluster */ 00206 clwdth = (d__1 = w[*clend] - w[*clstrt], absMACRO(d__1)) + werr[*clend] + werr[ 00207 *clstrt]; 00208 avgap = clwdth / (Treal) (*clend - *clstrt); 00209 mingap = minMACRO(*clgapl,*clgapr); 00210 /* Initial values for shifts to both ends of cluster */ 00211 /* Computing MIN */ 00212 d__1 = w[*clstrt], d__2 = w[*clend]; 00213 lsigma = minMACRO(d__1,d__2) - werr[*clstrt]; 00214 /* Computing MAX */ 00215 d__1 = w[*clstrt], d__2 = w[*clend]; 00216 rsigma = maxMACRO(d__1,d__2) + werr[*clend]; 00217 /* Use a small fudge to make sure that we really shift to the outside */ 00218 lsigma -= absMACRO(lsigma) * 4. * eps; 00219 rsigma += absMACRO(rsigma) * 4. * eps; 00220 /* Compute upper bounds for how much to back off the initial shifts */ 00221 ldmax = mingap * .25 + *pivmin * 2.; 00222 rdmax = mingap * .25 + *pivmin * 2.; 00223 /* Computing MAX */ 00224 d__1 = avgap, d__2 = wgap[*clstrt]; 00225 ldelta = maxMACRO(d__1,d__2) / fact; 00226 /* Computing MAX */ 00227 d__1 = avgap, d__2 = wgap[*clend - 1]; 00228 rdelta = maxMACRO(d__1,d__2) / fact; 00229 00230 /* Initialize the record of the best representation found */ 00231 00232 s = template_lapack_lamch("S", (Treal)0); 00233 smlgrowth = 1. / s; 00234 fail = (Treal) (*n - 1) * mingap / (*spdiam * eps); 00235 fail2 = (Treal) (*n - 1) * mingap / (*spdiam * template_blas_sqrt(eps)); 00236 bestshift = lsigma; 00237 00238 /* while (KTRY <= KTRYMAX) */ 00239 ktry = 0; 00240 growthbound = *spdiam * 8.; 00241 L5: 00242 sawnan1 = FALSE_; 00243 sawnan2 = FALSE_; 00244 /* Ensure that we do not back off too much of the initial shifts */ 00245 ldelta = minMACRO(ldmax,ldelta); 00246 rdelta = minMACRO(rdmax,rdelta); 00247 /* Compute the element growth when shifting to both ends of the cluster */ 00248 /* accept the shift if there is no element growth at one of the two ends */ 00249 /* Left end */ 00250 s = -lsigma; 00251 dplus[1] = d__[1] + s; 00252 if (absMACRO(dplus[1]) < *pivmin) { 00253 dplus[1] = -(*pivmin); 00254 /* Need to set SAWNAN1 because refined RRR test should not be used */ 00255 /* in this case */ 00256 sawnan1 = TRUE_; 00257 } 00258 max1 = absMACRO(dplus[1]); 00259 i__1 = *n - 1; 00260 for (i__ = 1; i__ <= i__1; ++i__) { 00261 lplus[i__] = ld[i__] / dplus[i__]; 00262 s = s * lplus[i__] * l[i__] - lsigma; 00263 dplus[i__ + 1] = d__[i__ + 1] + s; 00264 if ((d__1 = dplus[i__ + 1], absMACRO(d__1)) < *pivmin) { 00265 dplus[i__ + 1] = -(*pivmin); 00266 /* Need to set SAWNAN1 because refined RRR test should not be used */ 00267 /* in this case */ 00268 sawnan1 = TRUE_; 00269 } 00270 /* Computing MAX */ 00271 d__2 = max1, d__3 = (d__1 = dplus[i__ + 1], absMACRO(d__1)); 00272 max1 = maxMACRO(d__2,d__3); 00273 /* L6: */ 00274 } 00275 sawnan1 = sawnan1 || template_lapack_isnan(&max1); 00276 if (forcer || ( max1 <= growthbound && ! sawnan1 ) ) { 00277 *sigma = lsigma; 00278 shift = 1; 00279 goto L100; 00280 } 00281 /* Right end */ 00282 s = -rsigma; 00283 work[1] = d__[1] + s; 00284 if (absMACRO(work[1]) < *pivmin) { 00285 work[1] = -(*pivmin); 00286 /* Need to set SAWNAN2 because refined RRR test should not be used */ 00287 /* in this case */ 00288 sawnan2 = TRUE_; 00289 } 00290 max2 = absMACRO(work[1]); 00291 i__1 = *n - 1; 00292 for (i__ = 1; i__ <= i__1; ++i__) { 00293 work[*n + i__] = ld[i__] / work[i__]; 00294 s = s * work[*n + i__] * l[i__] - rsigma; 00295 work[i__ + 1] = d__[i__ + 1] + s; 00296 if ((d__1 = work[i__ + 1], absMACRO(d__1)) < *pivmin) { 00297 work[i__ + 1] = -(*pivmin); 00298 /* Need to set SAWNAN2 because refined RRR test should not be used */ 00299 /* in this case */ 00300 sawnan2 = TRUE_; 00301 } 00302 /* Computing MAX */ 00303 d__2 = max2, d__3 = (d__1 = work[i__ + 1], absMACRO(d__1)); 00304 max2 = maxMACRO(d__2,d__3); 00305 /* L7: */ 00306 } 00307 sawnan2 = sawnan2 || template_lapack_isnan(&max2); 00308 if (forcer || ( max2 <= growthbound && ! sawnan2 ) ) { 00309 *sigma = rsigma; 00310 shift = 2; 00311 goto L100; 00312 } 00313 /* If we are at this point, both shifts led to too much element growth */ 00314 /* Record the better of the two shifts (provided it didn't lead to NaN) */ 00315 if (sawnan1 && sawnan2) { 00316 /* both MAX1 and MAX2 are NaN */ 00317 goto L50; 00318 } else { 00319 if (! sawnan1) { 00320 indx = 1; 00321 if (max1 <= smlgrowth) { 00322 smlgrowth = max1; 00323 bestshift = lsigma; 00324 } 00325 } 00326 if (! sawnan2) { 00327 if (sawnan1 || max2 <= max1) { 00328 indx = 2; 00329 } 00330 if (max2 <= smlgrowth) { 00331 smlgrowth = max2; 00332 bestshift = rsigma; 00333 } 00334 } 00335 } 00336 /* If we are here, both the left and the right shift led to */ 00337 /* element growth. If the element growth is moderate, then */ 00338 /* we may still accept the representation, if it passes a */ 00339 /* refined test for RRR. This test supposes that no NaN occurred. */ 00340 /* Moreover, we use the refined RRR test only for isolated clusters. */ 00341 if (clwdth < mingap / 128. && minMACRO(max1,max2) < fail2 && ! sawnan1 && ! 00342 sawnan2) { 00343 dorrr1 = TRUE_; 00344 } else { 00345 dorrr1 = FALSE_; 00346 } 00347 tryrrr1 = TRUE_; 00348 if (tryrrr1 && dorrr1) { 00349 if (indx == 1) { 00350 tmp = (d__1 = dplus[*n], absMACRO(d__1)); 00351 znm2 = 1.; 00352 prod = 1.; 00353 oldp = 1.; 00354 for (i__ = *n - 1; i__ >= 1; --i__) { 00355 if (prod <= eps) { 00356 prod = dplus[i__ + 1] * work[*n + i__ + 1] / (dplus[i__] * 00357 work[*n + i__]) * oldp; 00358 } else { 00359 prod *= (d__1 = work[*n + i__], absMACRO(d__1)); 00360 } 00361 oldp = prod; 00362 /* Computing 2nd power */ 00363 d__1 = prod; 00364 znm2 += d__1 * d__1; 00365 /* Computing MAX */ 00366 d__2 = tmp, d__3 = (d__1 = dplus[i__] * prod, absMACRO(d__1)); 00367 tmp = maxMACRO(d__2,d__3); 00368 /* L15: */ 00369 } 00370 rrr1 = tmp / (*spdiam * template_blas_sqrt(znm2)); 00371 if (rrr1 <= 8.) { 00372 *sigma = lsigma; 00373 shift = 1; 00374 goto L100; 00375 } 00376 } else if (indx == 2) { 00377 tmp = (d__1 = work[*n], absMACRO(d__1)); 00378 znm2 = 1.; 00379 prod = 1.; 00380 oldp = 1.; 00381 for (i__ = *n - 1; i__ >= 1; --i__) { 00382 if (prod <= eps) { 00383 prod = work[i__ + 1] * lplus[i__ + 1] / (work[i__] * 00384 lplus[i__]) * oldp; 00385 } else { 00386 prod *= (d__1 = lplus[i__], absMACRO(d__1)); 00387 } 00388 oldp = prod; 00389 /* Computing 2nd power */ 00390 d__1 = prod; 00391 znm2 += d__1 * d__1; 00392 /* Computing MAX */ 00393 d__2 = tmp, d__3 = (d__1 = work[i__] * prod, absMACRO(d__1)); 00394 tmp = maxMACRO(d__2,d__3); 00395 /* L16: */ 00396 } 00397 rrr2 = tmp / (*spdiam * template_blas_sqrt(znm2)); 00398 if (rrr2 <= 8.) { 00399 *sigma = rsigma; 00400 shift = 2; 00401 goto L100; 00402 } 00403 } 00404 } 00405 L50: 00406 if (ktry < 1) { 00407 /* If we are here, both shifts failed also the RRR test. */ 00408 /* Back off to the outside */ 00409 /* Computing MAX */ 00410 d__1 = lsigma - ldelta, d__2 = lsigma - ldmax; 00411 lsigma = maxMACRO(d__1,d__2); 00412 /* Computing MIN */ 00413 d__1 = rsigma + rdelta, d__2 = rsigma + rdmax; 00414 rsigma = minMACRO(d__1,d__2); 00415 ldelta *= 2.; 00416 rdelta *= 2.; 00417 ++ktry; 00418 goto L5; 00419 } else { 00420 /* None of the representations investigated satisfied our */ 00421 /* criteria. Take the best one we found. */ 00422 if (smlgrowth < fail || nofail) { 00423 lsigma = bestshift; 00424 rsigma = bestshift; 00425 forcer = TRUE_; 00426 goto L5; 00427 } else { 00428 *info = 1; 00429 return 0; 00430 } 00431 } 00432 L100: 00433 if (shift == 1) { 00434 } else if (shift == 2) { 00435 /* store new L and D back into DPLUS, LPLUS */ 00436 template_blas_copy(n, &work[1], &c__1, &dplus[1], &c__1); 00437 i__1 = *n - 1; 00438 template_blas_copy(&i__1, &work[*n + 1], &c__1, &lplus[1], &c__1); 00439 } 00440 return 0; 00441 00442 /* End of DLARRF */ 00443 00444 } /* dlarrf_ */ 00445 00446 #endif