ergo
|
00001 /* Ergo, version 3.2, a program for linear scaling electronic structure 00002 * calculations. 00003 * Copyright (C) 2012 Elias Rudberg, Emanuel H. Rubensson, and Pawel Salek. 00004 * 00005 * This program is free software: you can redistribute it and/or modify 00006 * it under the terms of the GNU General Public License as published by 00007 * the Free Software Foundation, either version 3 of the License, or 00008 * (at your option) any later version. 00009 * 00010 * This program is distributed in the hope that it will be useful, 00011 * but WITHOUT ANY WARRANTY; without even the implied warranty of 00012 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00013 * GNU General Public License for more details. 00014 * 00015 * You should have received a copy of the GNU General Public License 00016 * along with this program. If not, see <http://www.gnu.org/licenses/>. 00017 * 00018 * Primary academic reference: 00019 * KohnâSham Density Functional Theory Electronic Structure Calculations 00020 * with Linearly Scaling Computational Time and Memory Usage, 00021 * Elias Rudberg, Emanuel H. Rubensson, and Pawel Salek, 00022 * J. Chem. Theory Comput. 7, 340 (2011), 00023 * <http://dx.doi.org/10.1021/ct100611z> 00024 * 00025 * For further information about Ergo, see <http://www.ergoscf.org>. 00026 */ 00027 00028 /* This file belongs to the template_lapack part of the Ergo source 00029 * code. The source files in the template_lapack directory are modified 00030 * versions of files originally distributed as CLAPACK, see the 00031 * Copyright/license notice in the file template_lapack/COPYING. 00032 */ 00033 00034 00035 #ifndef TEMPLATE_LAPACK_LARRJ_HEADER 00036 #define TEMPLATE_LAPACK_LARRJ_HEADER 00037 00038 template<class Treal> 00039 int template_lapack_larrj(integer *n, Treal *d__, Treal *e2, 00040 integer *ifirst, integer *ilast, Treal *rtol, integer *offset, 00041 Treal *w, Treal *werr, Treal *work, integer *iwork, 00042 Treal *pivmin, Treal *spdiam, integer *info) 00043 { 00044 /* System generated locals */ 00045 integer i__1, i__2; 00046 Treal d__1, d__2; 00047 00048 00049 /* Local variables */ 00050 integer i__, j, k, p; 00051 Treal s; 00052 integer i1, i2, ii; 00053 Treal fac, mid; 00054 integer cnt; 00055 Treal tmp, left; 00056 integer iter, nint, prev, next, savi1; 00057 Treal right, width, dplus; 00058 integer olnint, maxitr; 00059 00060 00061 /* -- LAPACK auxiliary routine (version 3.2) -- */ 00062 /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ 00063 /* November 2006 */ 00064 00065 /* .. Scalar Arguments .. */ 00066 /* .. */ 00067 /* .. Array Arguments .. */ 00068 /* .. */ 00069 00070 /* Purpose */ 00071 /* ======= */ 00072 00073 /* Given the initial eigenvalue approximations of T, DLARRJ */ 00074 /* does bisection to refine the eigenvalues of T, */ 00075 /* W( IFIRST-OFFSET ) through W( ILAST-OFFSET ), to more accuracy. Initial */ 00076 /* guesses for these eigenvalues are input in W, the corresponding estimate */ 00077 /* of the error in these guesses in WERR. During bisection, intervals */ 00078 /* [left, right] are maintained by storing their mid-points and */ 00079 /* semi-widths in the arrays W and WERR respectively. */ 00080 00081 /* Arguments */ 00082 /* ========= */ 00083 00084 /* N (input) INTEGER */ 00085 /* The order of the matrix. */ 00086 00087 /* D (input) DOUBLE PRECISION array, dimension (N) */ 00088 /* The N diagonal elements of T. */ 00089 00090 /* E2 (input) DOUBLE PRECISION array, dimension (N-1) */ 00091 /* The Squares of the (N-1) subdiagonal elements of T. */ 00092 00093 /* IFIRST (input) INTEGER */ 00094 /* The index of the first eigenvalue to be computed. */ 00095 00096 /* ILAST (input) INTEGER */ 00097 /* The index of the last eigenvalue to be computed. */ 00098 00099 /* RTOL (input) DOUBLE PRECISION */ 00100 /* Tolerance for the convergence of the bisection intervals. */ 00101 /* An interval [LEFT,RIGHT] has converged if */ 00102 /* RIGHT-LEFT.LT.RTOL*MAX(|LEFT|,|RIGHT|). */ 00103 00104 /* OFFSET (input) INTEGER */ 00105 /* Offset for the arrays W and WERR, i.e., the IFIRST-OFFSET */ 00106 /* through ILAST-OFFSET elements of these arrays are to be used. */ 00107 00108 /* W (input/output) DOUBLE PRECISION array, dimension (N) */ 00109 /* On input, W( IFIRST-OFFSET ) through W( ILAST-OFFSET ) are */ 00110 /* estimates of the eigenvalues of L D L^T indexed IFIRST through */ 00111 /* ILAST. */ 00112 /* On output, these estimates are refined. */ 00113 00114 /* WERR (input/output) DOUBLE PRECISION array, dimension (N) */ 00115 /* On input, WERR( IFIRST-OFFSET ) through WERR( ILAST-OFFSET ) are */ 00116 /* the errors in the estimates of the corresponding elements in W. */ 00117 /* On output, these errors are refined. */ 00118 00119 /* WORK (workspace) DOUBLE PRECISION array, dimension (2*N) */ 00120 /* Workspace. */ 00121 00122 /* IWORK (workspace) INTEGER array, dimension (2*N) */ 00123 /* Workspace. */ 00124 00125 /* PIVMIN (input) DOUBLE PRECISION */ 00126 /* The minimum pivot in the Sturm sequence for T. */ 00127 00128 /* SPDIAM (input) DOUBLE PRECISION */ 00129 /* The spectral diameter of T. */ 00130 00131 /* INFO (output) INTEGER */ 00132 /* Error flag. */ 00133 00134 /* Further Details */ 00135 /* =============== */ 00136 00137 /* Based on contributions by */ 00138 /* Beresford Parlett, University of California, Berkeley, USA */ 00139 /* Jim Demmel, University of California, Berkeley, USA */ 00140 /* Inderjit Dhillon, University of Texas, Austin, USA */ 00141 /* Osni Marques, LBNL/NERSC, USA */ 00142 /* Christof Voemel, University of California, Berkeley, USA */ 00143 00144 /* ===================================================================== */ 00145 00146 /* .. Parameters .. */ 00147 /* .. */ 00148 /* .. Local Scalars .. */ 00149 00150 /* .. */ 00151 /* .. Intrinsic Functions .. */ 00152 /* .. */ 00153 /* .. Executable Statements .. */ 00154 00155 /* Parameter adjustments */ 00156 --iwork; 00157 --work; 00158 --werr; 00159 --w; 00160 --e2; 00161 --d__; 00162 00163 /* Function Body */ 00164 *info = 0; 00165 00166 maxitr = (integer) ((template_blas_log(*spdiam + *pivmin) - template_blas_log(*pivmin)) / template_blas_log(2.)) + 00167 2; 00168 00169 /* Initialize unconverged intervals in [ WORK(2*I-1), WORK(2*I) ]. */ 00170 /* The Sturm Count, Count( WORK(2*I-1) ) is arranged to be I-1, while */ 00171 /* Count( WORK(2*I) ) is stored in IWORK( 2*I ). The integer IWORK( 2*I-1 ) */ 00172 /* for an unconverged interval is set to the index of the next unconverged */ 00173 /* interval, and is -1 or 0 for a converged interval. Thus a linked */ 00174 /* list of unconverged intervals is set up. */ 00175 00176 i1 = *ifirst; 00177 i2 = *ilast; 00178 /* The number of unconverged intervals */ 00179 nint = 0; 00180 /* The last unconverged interval found */ 00181 prev = 0; 00182 i__1 = i2; 00183 for (i__ = i1; i__ <= i__1; ++i__) { 00184 k = i__ << 1; 00185 ii = i__ - *offset; 00186 left = w[ii] - werr[ii]; 00187 mid = w[ii]; 00188 right = w[ii] + werr[ii]; 00189 width = right - mid; 00190 /* Computing MAX */ 00191 d__1 = absMACRO(left), d__2 = absMACRO(right); 00192 tmp = maxMACRO(d__1,d__2); 00193 /* The following test prevents the test of converged intervals */ 00194 if (width < *rtol * tmp) { 00195 /* This interval has already converged and does not need refinement. */ 00196 /* (Note that the gaps might change through refining the */ 00197 /* eigenvalues, however, they can only get bigger.) */ 00198 /* Remove it from the list. */ 00199 iwork[k - 1] = -1; 00200 /* Make sure that I1 always points to the first unconverged interval */ 00201 if (i__ == i1 && i__ < i2) { 00202 i1 = i__ + 1; 00203 } 00204 if (prev >= i1 && i__ <= i2) { 00205 iwork[(prev << 1) - 1] = i__ + 1; 00206 } 00207 } else { 00208 /* unconverged interval found */ 00209 prev = i__; 00210 /* Make sure that [LEFT,RIGHT] contains the desired eigenvalue */ 00211 00212 /* Do while( CNT(LEFT).GT.I-1 ) */ 00213 00214 fac = 1.; 00215 L20: 00216 cnt = 0; 00217 s = left; 00218 dplus = d__[1] - s; 00219 if (dplus < 0.) { 00220 ++cnt; 00221 } 00222 i__2 = *n; 00223 for (j = 2; j <= i__2; ++j) { 00224 dplus = d__[j] - s - e2[j - 1] / dplus; 00225 if (dplus < 0.) { 00226 ++cnt; 00227 } 00228 /* L30: */ 00229 } 00230 if (cnt > i__ - 1) { 00231 left -= werr[ii] * fac; 00232 fac *= 2.; 00233 goto L20; 00234 } 00235 00236 /* Do while( CNT(RIGHT).LT.I ) */ 00237 00238 fac = 1.; 00239 L50: 00240 cnt = 0; 00241 s = right; 00242 dplus = d__[1] - s; 00243 if (dplus < 0.) { 00244 ++cnt; 00245 } 00246 i__2 = *n; 00247 for (j = 2; j <= i__2; ++j) { 00248 dplus = d__[j] - s - e2[j - 1] / dplus; 00249 if (dplus < 0.) { 00250 ++cnt; 00251 } 00252 /* L60: */ 00253 } 00254 if (cnt < i__) { 00255 right += werr[ii] * fac; 00256 fac *= 2.; 00257 goto L50; 00258 } 00259 ++nint; 00260 iwork[k - 1] = i__ + 1; 00261 iwork[k] = cnt; 00262 } 00263 work[k - 1] = left; 00264 work[k] = right; 00265 /* L75: */ 00266 } 00267 savi1 = i1; 00268 00269 /* Do while( NINT.GT.0 ), i.e. there are still unconverged intervals */ 00270 /* and while (ITER.LT.MAXITR) */ 00271 00272 iter = 0; 00273 L80: 00274 prev = i1 - 1; 00275 i__ = i1; 00276 olnint = nint; 00277 i__1 = olnint; 00278 for (p = 1; p <= i__1; ++p) { 00279 k = i__ << 1; 00280 ii = i__ - *offset; 00281 next = iwork[k - 1]; 00282 left = work[k - 1]; 00283 right = work[k]; 00284 mid = (left + right) * .5; 00285 /* semiwidth of interval */ 00286 width = right - mid; 00287 /* Computing MAX */ 00288 d__1 = absMACRO(left), d__2 = absMACRO(right); 00289 tmp = maxMACRO(d__1,d__2); 00290 if (width < *rtol * tmp || iter == maxitr) { 00291 /* reduce number of unconverged intervals */ 00292 --nint; 00293 /* Mark interval as converged. */ 00294 iwork[k - 1] = 0; 00295 if (i1 == i__) { 00296 i1 = next; 00297 } else { 00298 /* Prev holds the last unconverged interval previously examined */ 00299 if (prev >= i1) { 00300 iwork[(prev << 1) - 1] = next; 00301 } 00302 } 00303 i__ = next; 00304 goto L100; 00305 } 00306 prev = i__; 00307 00308 /* Perform one bisection step */ 00309 00310 cnt = 0; 00311 s = mid; 00312 dplus = d__[1] - s; 00313 if (dplus < 0.) { 00314 ++cnt; 00315 } 00316 i__2 = *n; 00317 for (j = 2; j <= i__2; ++j) { 00318 dplus = d__[j] - s - e2[j - 1] / dplus; 00319 if (dplus < 0.) { 00320 ++cnt; 00321 } 00322 /* L90: */ 00323 } 00324 if (cnt <= i__ - 1) { 00325 work[k - 1] = mid; 00326 } else { 00327 work[k] = mid; 00328 } 00329 i__ = next; 00330 L100: 00331 ; 00332 } 00333 ++iter; 00334 /* do another loop if there are still unconverged intervals */ 00335 /* However, in the last iteration, all intervals are accepted */ 00336 /* since this is the best we can do. */ 00337 if (nint > 0 && iter <= maxitr) { 00338 goto L80; 00339 } 00340 00341 00342 /* At this point, all the intervals have converged */ 00343 i__1 = *ilast; 00344 for (i__ = savi1; i__ <= i__1; ++i__) { 00345 k = i__ << 1; 00346 ii = i__ - *offset; 00347 /* All intervals marked by '0' have been refined. */ 00348 if (iwork[k - 1] == 0) { 00349 w[ii] = (work[k - 1] + work[k]) * .5; 00350 werr[ii] = work[k] - w[ii]; 00351 } 00352 /* L110: */ 00353 } 00354 00355 return 0; 00356 00357 /* End of DLARRJ */ 00358 00359 } /* dlarrj_ */ 00360 00361 #endif