ergo
|
00001 /* Ergo, version 3.2, a program for linear scaling electronic structure 00002 * calculations. 00003 * Copyright (C) 2012 Elias Rudberg, Emanuel H. Rubensson, and Pawel Salek. 00004 * 00005 * This program is free software: you can redistribute it and/or modify 00006 * it under the terms of the GNU General Public License as published by 00007 * the Free Software Foundation, either version 3 of the License, or 00008 * (at your option) any later version. 00009 * 00010 * This program is distributed in the hope that it will be useful, 00011 * but WITHOUT ANY WARRANTY; without even the implied warranty of 00012 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00013 * GNU General Public License for more details. 00014 * 00015 * You should have received a copy of the GNU General Public License 00016 * along with this program. If not, see <http://www.gnu.org/licenses/>. 00017 * 00018 * Primary academic reference: 00019 * KohnâSham Density Functional Theory Electronic Structure Calculations 00020 * with Linearly Scaling Computational Time and Memory Usage, 00021 * Elias Rudberg, Emanuel H. Rubensson, and Pawel Salek, 00022 * J. Chem. Theory Comput. 7, 340 (2011), 00023 * <http://dx.doi.org/10.1021/ct100611z> 00024 * 00025 * For further information about Ergo, see <http://www.ergoscf.org>. 00026 */ 00027 00028 /* This file belongs to the template_lapack part of the Ergo source 00029 * code. The source files in the template_lapack directory are modified 00030 * versions of files originally distributed as CLAPACK, see the 00031 * Copyright/license notice in the file template_lapack/COPYING. 00032 */ 00033 00034 00035 #ifndef TEMPLATE_LAPACK_LARRV_HEADER 00036 #define TEMPLATE_LAPACK_LARRV_HEADER 00037 00038 template<class Treal> 00039 int template_lapack_larrv(const integer *n, Treal *vl, Treal *vu, 00040 Treal *d__, Treal *l, Treal *pivmin, integer *isplit, 00041 integer *m, integer *dol, integer *dou, Treal *minrgp, 00042 Treal *rtol1, Treal *rtol2, Treal *w, Treal *werr, 00043 Treal *wgap, integer *iblock, integer *indexw, Treal *gers, 00044 Treal *z__, const integer *ldz, integer *isuppz, Treal *work, 00045 integer *iwork, integer *info) 00046 { 00047 /* System generated locals */ 00048 integer z_dim1, z_offset, i__1, i__2, i__3, i__4, i__5; 00049 Treal d__1, d__2; 00050 logical L__1; 00051 00052 00053 /* Local variables */ 00054 integer minwsize, i__, j, k, p, q, miniwsize, ii; 00055 Treal gl; 00056 integer im, in; 00057 Treal gu, gap, eps, tau, tol, tmp; 00058 integer zto; 00059 Treal ztz; 00060 integer iend, jblk; 00061 Treal lgap; 00062 integer done; 00063 Treal rgap, left; 00064 integer wend, iter; 00065 Treal bstw = 0; // EMANUEL COMMENT: initialize to get rid of compiler warning 00066 integer itmp1; 00067 integer indld; 00068 Treal fudge; 00069 integer idone; 00070 Treal sigma; 00071 integer iinfo, iindr; 00072 Treal resid; 00073 logical eskip; 00074 Treal right; 00075 integer nclus, zfrom; 00076 Treal rqtol; 00077 integer iindc1, iindc2; 00078 logical stp2ii; 00079 Treal lambda; 00080 integer ibegin, indeig; 00081 logical needbs; 00082 integer indlld; 00083 Treal sgndef, mingma; 00084 integer oldien, oldncl, wbegin; 00085 Treal spdiam; 00086 integer negcnt; 00087 integer oldcls; 00088 Treal savgap; 00089 integer ndepth; 00090 Treal ssigma; 00091 logical usedbs; 00092 integer iindwk, offset; 00093 Treal gaptol; 00094 integer newcls, oldfst, indwrk, windex, oldlst; 00095 logical usedrq; 00096 integer newfst, newftt, parity, windmn, windpl, isupmn, newlst, zusedl; 00097 Treal bstres = 0; // EMANUEL COMMENT: initialize to get rid of compiler warning 00098 integer newsiz, zusedu, zusedw; 00099 Treal nrminv, rqcorr; 00100 logical tryrqc; 00101 integer isupmx; 00102 00103 00104 /* -- LAPACK auxiliary routine (version 3.2) -- */ 00105 /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ 00106 /* November 2006 */ 00107 00108 /* .. Scalar Arguments .. */ 00109 /* .. */ 00110 /* .. Array Arguments .. */ 00111 /* .. */ 00112 00113 /* Purpose */ 00114 /* ======= */ 00115 00116 /* DLARRV computes the eigenvectors of the tridiagonal matrix */ 00117 /* T = L D L^T given L, D and APPROXIMATIONS to the eigenvalues of L D L^T. */ 00118 /* The input eigenvalues should have been computed by DLARRE. */ 00119 00120 /* Arguments */ 00121 /* ========= */ 00122 00123 /* N (input) INTEGER */ 00124 /* The order of the matrix. N >= 0. */ 00125 00126 /* VL (input) DOUBLE PRECISION */ 00127 /* VU (input) DOUBLE PRECISION */ 00128 /* Lower and upper bounds of the interval that contains the desired */ 00129 /* eigenvalues. VL < VU. Needed to compute gaps on the left or right */ 00130 /* end of the extremal eigenvalues in the desired RANGE. */ 00131 00132 /* D (input/output) DOUBLE PRECISION array, dimension (N) */ 00133 /* On entry, the N diagonal elements of the diagonal matrix D. */ 00134 /* On exit, D may be overwritten. */ 00135 00136 /* L (input/output) DOUBLE PRECISION array, dimension (N) */ 00137 /* On entry, the (N-1) subdiagonal elements of the unit */ 00138 /* bidiagonal matrix L are in elements 1 to N-1 of L */ 00139 /* (if the matrix is not splitted.) At the end of each block */ 00140 /* is stored the corresponding shift as given by DLARRE. */ 00141 /* On exit, L is overwritten. */ 00142 00143 /* PIVMIN (in) DOUBLE PRECISION */ 00144 /* The minimum pivot allowed in the Sturm sequence. */ 00145 00146 /* ISPLIT (input) INTEGER array, dimension (N) */ 00147 /* The splitting points, at which T breaks up into blocks. */ 00148 /* The first block consists of rows/columns 1 to */ 00149 /* ISPLIT( 1 ), the second of rows/columns ISPLIT( 1 )+1 */ 00150 /* through ISPLIT( 2 ), etc. */ 00151 00152 /* M (input) INTEGER */ 00153 /* The total number of input eigenvalues. 0 <= M <= N. */ 00154 00155 /* DOL (input) INTEGER */ 00156 /* DOU (input) INTEGER */ 00157 /* If the user wants to compute only selected eigenvectors from all */ 00158 /* the eigenvalues supplied, he can specify an index range DOL:DOU. */ 00159 /* Or else the setting DOL=1, DOU=M should be applied. */ 00160 /* Note that DOL and DOU refer to the order in which the eigenvalues */ 00161 /* are stored in W. */ 00162 /* If the user wants to compute only selected eigenpairs, then */ 00163 /* the columns DOL-1 to DOU+1 of the eigenvector space Z contain the */ 00164 /* computed eigenvectors. All other columns of Z are set to zero. */ 00165 00166 /* MINRGP (input) DOUBLE PRECISION */ 00167 00168 /* RTOL1 (input) DOUBLE PRECISION */ 00169 /* RTOL2 (input) DOUBLE PRECISION */ 00170 /* Parameters for bisection. */ 00171 /* An interval [LEFT,RIGHT] has converged if */ 00172 /* RIGHT-LEFT.LT.MAX( RTOL1*GAP, RTOL2*MAX(|LEFT|,|RIGHT|) ) */ 00173 00174 /* W (input/output) DOUBLE PRECISION array, dimension (N) */ 00175 /* The first M elements of W contain the APPROXIMATE eigenvalues for */ 00176 /* which eigenvectors are to be computed. The eigenvalues */ 00177 /* should be grouped by split-off block and ordered from */ 00178 /* smallest to largest within the block ( The output array */ 00179 /* W from DLARRE is expected here ). Furthermore, they are with */ 00180 /* respect to the shift of the corresponding root representation */ 00181 /* for their block. On exit, W holds the eigenvalues of the */ 00182 /* UNshifted matrix. */ 00183 00184 /* WERR (input/output) DOUBLE PRECISION array, dimension (N) */ 00185 /* The first M elements contain the semiwidth of the uncertainty */ 00186 /* interval of the corresponding eigenvalue in W */ 00187 00188 /* WGAP (input/output) DOUBLE PRECISION array, dimension (N) */ 00189 /* The separation from the right neighbor eigenvalue in W. */ 00190 00191 /* IBLOCK (input) INTEGER array, dimension (N) */ 00192 /* The indices of the blocks (submatrices) associated with the */ 00193 /* corresponding eigenvalues in W; IBLOCK(i)=1 if eigenvalue */ 00194 /* W(i) belongs to the first block from the top, =2 if W(i) */ 00195 /* belongs to the second block, etc. */ 00196 00197 /* INDEXW (input) INTEGER array, dimension (N) */ 00198 /* The indices of the eigenvalues within each block (submatrix); */ 00199 /* for example, INDEXW(i)= 10 and IBLOCK(i)=2 imply that the */ 00200 /* i-th eigenvalue W(i) is the 10-th eigenvalue in the second block. */ 00201 00202 /* GERS (input) DOUBLE PRECISION array, dimension (2*N) */ 00203 /* The N Gerschgorin intervals (the i-th Gerschgorin interval */ 00204 /* is (GERS(2*i-1), GERS(2*i)). The Gerschgorin intervals should */ 00205 /* be computed from the original UNshifted matrix. */ 00206 00207 /* Z (output) DOUBLE PRECISION array, dimension (LDZ, max(1,M) ) */ 00208 /* If INFO = 0, the first M columns of Z contain the */ 00209 /* orthonormal eigenvectors of the matrix T */ 00210 /* corresponding to the input eigenvalues, with the i-th */ 00211 /* column of Z holding the eigenvector associated with W(i). */ 00212 /* Note: the user must ensure that at least max(1,M) columns are */ 00213 /* supplied in the array Z. */ 00214 00215 /* LDZ (input) INTEGER */ 00216 /* The leading dimension of the array Z. LDZ >= 1, and if */ 00217 /* JOBZ = 'V', LDZ >= max(1,N). */ 00218 00219 /* ISUPPZ (output) INTEGER array, dimension ( 2*max(1,M) ) */ 00220 /* The support of the eigenvectors in Z, i.e., the indices */ 00221 /* indicating the nonzero elements in Z. The I-th eigenvector */ 00222 /* is nonzero only in elements ISUPPZ( 2*I-1 ) through */ 00223 /* ISUPPZ( 2*I ). */ 00224 00225 /* WORK (workspace) DOUBLE PRECISION array, dimension (12*N) */ 00226 00227 /* IWORK (workspace) INTEGER array, dimension (7*N) */ 00228 00229 /* INFO (output) INTEGER */ 00230 /* = 0: successful exit */ 00231 00232 /* > 0: A problem occured in DLARRV. */ 00233 /* < 0: One of the called subroutines signaled an internal problem. */ 00234 /* Needs inspection of the corresponding parameter IINFO */ 00235 /* for further information. */ 00236 00237 /* =-1: Problem in DLARRB when refining a child's eigenvalues. */ 00238 /* =-2: Problem in DLARRF when computing the RRR of a child. */ 00239 /* When a child is inside a tight cluster, it can be difficult */ 00240 /* to find an RRR. A partial remedy from the user's point of */ 00241 /* view is to make the parameter MINRGP smaller and recompile. */ 00242 /* However, as the orthogonality of the computed vectors is */ 00243 /* proportional to 1/MINRGP, the user should be aware that */ 00244 /* he might be trading in precision when he decreases MINRGP. */ 00245 /* =-3: Problem in DLARRB when refining a single eigenvalue */ 00246 /* after the Rayleigh correction was rejected. */ 00247 /* = 5: The Rayleigh Quotient Iteration failed to converge to */ 00248 /* full accuracy in MAXITR steps. */ 00249 00250 /* Further Details */ 00251 /* =============== */ 00252 00253 /* Based on contributions by */ 00254 /* Beresford Parlett, University of California, Berkeley, USA */ 00255 /* Jim Demmel, University of California, Berkeley, USA */ 00256 /* Inderjit Dhillon, University of Texas, Austin, USA */ 00257 /* Osni Marques, LBNL/NERSC, USA */ 00258 /* Christof Voemel, University of California, Berkeley, USA */ 00259 00260 /* ===================================================================== */ 00261 00262 /* .. Parameters .. */ 00263 /* .. */ 00264 /* .. Local Scalars .. */ 00265 /* .. */ 00266 /* .. External Functions .. */ 00267 /* .. */ 00268 /* .. External Subroutines .. */ 00269 /* .. */ 00270 /* .. Intrinsic Functions .. */ 00271 /* .. */ 00272 /* .. Executable Statements .. */ 00273 /* .. */ 00274 /* The first N entries of WORK are reserved for the eigenvalues */ 00275 00276 /* Table of constant values */ 00277 00278 Treal c_b5 = 0.; 00279 integer c__1 = 1; 00280 integer c__2 = 2; 00281 00282 00283 /* Parameter adjustments */ 00284 --d__; 00285 --l; 00286 --isplit; 00287 --w; 00288 --werr; 00289 --wgap; 00290 --iblock; 00291 --indexw; 00292 --gers; 00293 z_dim1 = *ldz; 00294 z_offset = 1 + z_dim1; 00295 z__ -= z_offset; 00296 --isuppz; 00297 --work; 00298 --iwork; 00299 00300 /* Function Body */ 00301 indld = *n + 1; 00302 indlld = (*n << 1) + 1; 00303 indwrk = *n * 3 + 1; 00304 minwsize = *n * 12; 00305 i__1 = minwsize; 00306 for (i__ = 1; i__ <= i__1; ++i__) { 00307 work[i__] = 0.; 00308 /* L5: */ 00309 } 00310 /* IWORK(IINDR+1:IINDR+N) hold the twist indices R for the */ 00311 /* factorization used to compute the FP vector */ 00312 iindr = 0; 00313 /* IWORK(IINDC1+1:IINC2+N) are used to store the clusters of the current */ 00314 /* layer and the one above. */ 00315 iindc1 = *n; 00316 iindc2 = *n << 1; 00317 iindwk = *n * 3 + 1; 00318 miniwsize = *n * 7; 00319 i__1 = miniwsize; 00320 for (i__ = 1; i__ <= i__1; ++i__) { 00321 iwork[i__] = 0; 00322 /* L10: */ 00323 } 00324 zusedl = 1; 00325 if (*dol > 1) { 00326 /* Set lower bound for use of Z */ 00327 zusedl = *dol - 1; 00328 } 00329 zusedu = *m; 00330 if (*dou < *m) { 00331 /* Set lower bound for use of Z */ 00332 zusedu = *dou + 1; 00333 } 00334 /* The width of the part of Z that is used */ 00335 zusedw = zusedu - zusedl + 1; 00336 template_lapack_laset("Full", n, &zusedw, &c_b5, &c_b5, &z__[zusedl * z_dim1 + 1], ldz); 00337 eps = template_lapack_lamch("Precision",(Treal)0); 00338 rqtol = eps * 2.; 00339 00340 /* Set expert flags for standard code. */ 00341 tryrqc = TRUE_; 00342 if (*dol == 1 && *dou == *m) { 00343 } else { 00344 /* Only selected eigenpairs are computed. Since the other evalues */ 00345 /* are not refined by RQ iteration, bisection has to compute to full */ 00346 /* accuracy. */ 00347 *rtol1 = eps * 4.; 00348 *rtol2 = eps * 4.; 00349 } 00350 /* The entries WBEGIN:WEND in W, WERR, WGAP correspond to the */ 00351 /* desired eigenvalues. The support of the nonzero eigenvector */ 00352 /* entries is contained in the interval IBEGIN:IEND. */ 00353 /* Remark that if k eigenpairs are desired, then the eigenvectors */ 00354 /* are stored in k contiguous columns of Z. */ 00355 /* DONE is the number of eigenvectors already computed */ 00356 done = 0; 00357 ibegin = 1; 00358 wbegin = 1; 00359 i__1 = iblock[*m]; 00360 for (jblk = 1; jblk <= i__1; ++jblk) { 00361 iend = isplit[jblk]; 00362 sigma = l[iend]; 00363 /* Find the eigenvectors of the submatrix indexed IBEGIN */ 00364 /* through IEND. */ 00365 wend = wbegin - 1; 00366 L15: 00367 if (wend < *m) { 00368 if (iblock[wend + 1] == jblk) { 00369 ++wend; 00370 goto L15; 00371 } 00372 } 00373 if (wend < wbegin) { 00374 ibegin = iend + 1; 00375 goto L170; 00376 } else if (wend < *dol || wbegin > *dou) { 00377 ibegin = iend + 1; 00378 wbegin = wend + 1; 00379 goto L170; 00380 } 00381 /* Find local spectral diameter of the block */ 00382 gl = gers[(ibegin << 1) - 1]; 00383 gu = gers[ibegin * 2]; 00384 i__2 = iend; 00385 for (i__ = ibegin + 1; i__ <= i__2; ++i__) { 00386 /* Computing MIN */ 00387 d__1 = gers[(i__ << 1) - 1]; 00388 gl = minMACRO(d__1,gl); 00389 /* Computing MAX */ 00390 d__1 = gers[i__ * 2]; 00391 gu = maxMACRO(d__1,gu); 00392 /* L20: */ 00393 } 00394 spdiam = gu - gl; 00395 /* OLDIEN is the last index of the previous block */ 00396 oldien = ibegin - 1; 00397 /* Calculate the size of the current block */ 00398 in = iend - ibegin + 1; 00399 /* The number of eigenvalues in the current block */ 00400 im = wend - wbegin + 1; 00401 /* This is for a 1x1 block */ 00402 if (ibegin == iend) { 00403 ++done; 00404 z__[ibegin + wbegin * z_dim1] = 1.; 00405 isuppz[(wbegin << 1) - 1] = ibegin; 00406 isuppz[wbegin * 2] = ibegin; 00407 w[wbegin] += sigma; 00408 work[wbegin] = w[wbegin]; 00409 ibegin = iend + 1; 00410 ++wbegin; 00411 goto L170; 00412 } 00413 /* The desired (shifted) eigenvalues are stored in W(WBEGIN:WEND) */ 00414 /* Note that these can be approximations, in this case, the corresp. */ 00415 /* entries of WERR give the size of the uncertainty interval. */ 00416 /* The eigenvalue approximations will be refined when necessary as */ 00417 /* high relative accuracy is required for the computation of the */ 00418 /* corresponding eigenvectors. */ 00419 template_blas_copy(&im, &w[wbegin], &c__1, &work[wbegin], &c__1); 00420 /* We store in W the eigenvalue approximations w.r.t. the original */ 00421 /* matrix T. */ 00422 i__2 = im; 00423 for (i__ = 1; i__ <= i__2; ++i__) { 00424 w[wbegin + i__ - 1] += sigma; 00425 /* L30: */ 00426 } 00427 /* NDEPTH is the current depth of the representation tree */ 00428 ndepth = 0; 00429 /* PARITY is either 1 or 0 */ 00430 parity = 1; 00431 /* NCLUS is the number of clusters for the next level of the */ 00432 /* representation tree, we start with NCLUS = 1 for the root */ 00433 nclus = 1; 00434 iwork[iindc1 + 1] = 1; 00435 iwork[iindc1 + 2] = im; 00436 /* IDONE is the number of eigenvectors already computed in the current */ 00437 /* block */ 00438 idone = 0; 00439 /* loop while( IDONE.LT.IM ) */ 00440 /* generate the representation tree for the current block and */ 00441 /* compute the eigenvectors */ 00442 L40: 00443 if (idone < im) { 00444 /* This is a crude protection against infinitely deep trees */ 00445 if (ndepth > *m) { 00446 *info = -2; 00447 return 0; 00448 } 00449 /* breadth first processing of the current level of the representation */ 00450 /* tree: OLDNCL = number of clusters on current level */ 00451 oldncl = nclus; 00452 /* reset NCLUS to count the number of child clusters */ 00453 nclus = 0; 00454 00455 parity = 1 - parity; 00456 if (parity == 0) { 00457 oldcls = iindc1; 00458 newcls = iindc2; 00459 } else { 00460 oldcls = iindc2; 00461 newcls = iindc1; 00462 } 00463 /* Process the clusters on the current level */ 00464 i__2 = oldncl; 00465 for (i__ = 1; i__ <= i__2; ++i__) { 00466 j = oldcls + (i__ << 1); 00467 /* OLDFST, OLDLST = first, last index of current cluster. */ 00468 /* cluster indices start with 1 and are relative */ 00469 /* to WBEGIN when accessing W, WGAP, WERR, Z */ 00470 oldfst = iwork[j - 1]; 00471 oldlst = iwork[j]; 00472 if (ndepth > 0) { 00473 /* Retrieve relatively robust representation (RRR) of cluster */ 00474 /* that has been computed at the previous level */ 00475 /* The RRR is stored in Z and overwritten once the eigenvectors */ 00476 /* have been computed or when the cluster is refined */ 00477 if (*dol == 1 && *dou == *m) { 00478 /* Get representation from location of the leftmost evalue */ 00479 /* of the cluster */ 00480 j = wbegin + oldfst - 1; 00481 } else { 00482 if (wbegin + oldfst - 1 < *dol) { 00483 /* Get representation from the left end of Z array */ 00484 j = *dol - 1; 00485 } else if (wbegin + oldfst - 1 > *dou) { 00486 /* Get representation from the right end of Z array */ 00487 j = *dou; 00488 } else { 00489 j = wbegin + oldfst - 1; 00490 } 00491 } 00492 template_blas_copy(&in, &z__[ibegin + j * z_dim1], &c__1, &d__[ibegin] 00493 , &c__1); 00494 i__3 = in - 1; 00495 template_blas_copy(&i__3, &z__[ibegin + (j + 1) * z_dim1], &c__1, &l[ 00496 ibegin], &c__1); 00497 sigma = z__[iend + (j + 1) * z_dim1]; 00498 /* Set the corresponding entries in Z to zero */ 00499 template_lapack_laset("Full", &in, &c__2, &c_b5, &c_b5, &z__[ibegin + j 00500 * z_dim1], ldz); 00501 } 00502 /* Compute DL and DLL of current RRR */ 00503 i__3 = iend - 1; 00504 for (j = ibegin; j <= i__3; ++j) { 00505 tmp = d__[j] * l[j]; 00506 work[indld - 1 + j] = tmp; 00507 work[indlld - 1 + j] = tmp * l[j]; 00508 /* L50: */ 00509 } 00510 if (ndepth > 0) { 00511 /* P and Q are index of the first and last eigenvalue to compute */ 00512 /* within the current block */ 00513 p = indexw[wbegin - 1 + oldfst]; 00514 q = indexw[wbegin - 1 + oldlst]; 00515 /* Offset for the arrays WORK, WGAP and WERR, i.e., th P-OFFSET */ 00516 /* thru' Q-OFFSET elements of these arrays are to be used. */ 00517 /* OFFSET = P-OLDFST */ 00518 offset = indexw[wbegin] - 1; 00519 /* perform limited bisection (if necessary) to get approximate */ 00520 /* eigenvalues to the precision needed. */ 00521 template_lapack_larrb(&in, &d__[ibegin], &work[indlld + ibegin - 1], &p, 00522 &q, rtol1, rtol2, &offset, &work[wbegin], &wgap[ 00523 wbegin], &werr[wbegin], &work[indwrk], &iwork[ 00524 iindwk], pivmin, &spdiam, &in, &iinfo); 00525 if (iinfo != 0) { 00526 *info = -1; 00527 return 0; 00528 } 00529 /* We also recompute the extremal gaps. W holds all eigenvalues */ 00530 /* of the unshifted matrix and must be used for computation */ 00531 /* of WGAP, the entries of WORK might stem from RRRs with */ 00532 /* different shifts. The gaps from WBEGIN-1+OLDFST to */ 00533 /* WBEGIN-1+OLDLST are correctly computed in DLARRB. */ 00534 /* However, we only allow the gaps to become greater since */ 00535 /* this is what should happen when we decrease WERR */ 00536 if (oldfst > 1) { 00537 /* Computing MAX */ 00538 d__1 = wgap[wbegin + oldfst - 2], d__2 = w[wbegin + 00539 oldfst - 1] - werr[wbegin + oldfst - 1] - w[ 00540 wbegin + oldfst - 2] - werr[wbegin + oldfst - 00541 2]; 00542 wgap[wbegin + oldfst - 2] = maxMACRO(d__1,d__2); 00543 } 00544 if (wbegin + oldlst - 1 < wend) { 00545 /* Computing MAX */ 00546 d__1 = wgap[wbegin + oldlst - 1], d__2 = w[wbegin + 00547 oldlst] - werr[wbegin + oldlst] - w[wbegin + 00548 oldlst - 1] - werr[wbegin + oldlst - 1]; 00549 wgap[wbegin + oldlst - 1] = maxMACRO(d__1,d__2); 00550 } 00551 /* Each time the eigenvalues in WORK get refined, we store */ 00552 /* the newly found approximation with all shifts applied in W */ 00553 i__3 = oldlst; 00554 for (j = oldfst; j <= i__3; ++j) { 00555 w[wbegin + j - 1] = work[wbegin + j - 1] + sigma; 00556 /* L53: */ 00557 } 00558 } 00559 /* Process the current node. */ 00560 newfst = oldfst; 00561 i__3 = oldlst; 00562 for (j = oldfst; j <= i__3; ++j) { 00563 if (j == oldlst) { 00564 /* we are at the right end of the cluster, this is also the */ 00565 /* boundary of the child cluster */ 00566 newlst = j; 00567 } else if (wgap[wbegin + j - 1] >= *minrgp * (d__1 = work[ 00568 wbegin + j - 1], absMACRO(d__1))) { 00569 /* the right relative gap is big enough, the child cluster */ 00570 /* (NEWFST,..,NEWLST) is well separated from the following */ 00571 newlst = j; 00572 } else { 00573 /* inside a child cluster, the relative gap is not */ 00574 /* big enough. */ 00575 goto L140; 00576 } 00577 /* Compute size of child cluster found */ 00578 newsiz = newlst - newfst + 1; 00579 /* NEWFTT is the place in Z where the new RRR or the computed */ 00580 /* eigenvector is to be stored */ 00581 if (*dol == 1 && *dou == *m) { 00582 /* Store representation at location of the leftmost evalue */ 00583 /* of the cluster */ 00584 newftt = wbegin + newfst - 1; 00585 } else { 00586 if (wbegin + newfst - 1 < *dol) { 00587 /* Store representation at the left end of Z array */ 00588 newftt = *dol - 1; 00589 } else if (wbegin + newfst - 1 > *dou) { 00590 /* Store representation at the right end of Z array */ 00591 newftt = *dou; 00592 } else { 00593 newftt = wbegin + newfst - 1; 00594 } 00595 } 00596 if (newsiz > 1) { 00597 00598 /* Current child is not a singleton but a cluster. */ 00599 /* Compute and store new representation of child. */ 00600 00601 00602 /* Compute left and right cluster gap. */ 00603 00604 /* LGAP and RGAP are not computed from WORK because */ 00605 /* the eigenvalue approximations may stem from RRRs */ 00606 /* different shifts. However, W hold all eigenvalues */ 00607 /* of the unshifted matrix. Still, the entries in WGAP */ 00608 /* have to be computed from WORK since the entries */ 00609 /* in W might be of the same order so that gaps are not */ 00610 /* exhibited correctly for very close eigenvalues. */ 00611 if (newfst == 1) { 00612 /* Computing MAX */ 00613 d__1 = 0., d__2 = w[wbegin] - werr[wbegin] - *vl; 00614 lgap = maxMACRO(d__1,d__2); 00615 } else { 00616 lgap = wgap[wbegin + newfst - 2]; 00617 } 00618 rgap = wgap[wbegin + newlst - 1]; 00619 00620 /* Compute left- and rightmost eigenvalue of child */ 00621 /* to high precision in order to shift as close */ 00622 /* as possible and obtain as large relative gaps */ 00623 /* as possible */ 00624 00625 for (k = 1; k <= 2; ++k) { 00626 if (k == 1) { 00627 p = indexw[wbegin - 1 + newfst]; 00628 } else { 00629 p = indexw[wbegin - 1 + newlst]; 00630 } 00631 offset = indexw[wbegin] - 1; 00632 template_lapack_larrb(&in, &d__[ibegin], &work[indlld + ibegin 00633 - 1], &p, &p, &rqtol, &rqtol, &offset, & 00634 work[wbegin], &wgap[wbegin], &werr[wbegin] 00635 , &work[indwrk], &iwork[iindwk], pivmin, & 00636 spdiam, &in, &iinfo); 00637 /* L55: */ 00638 } 00639 00640 if (wbegin + newlst - 1 < *dol || wbegin + newfst - 1 00641 > *dou) { 00642 /* if the cluster contains no desired eigenvalues */ 00643 /* skip the computation of that branch of the rep. tree */ 00644 00645 /* We could skip before the refinement of the extremal */ 00646 /* eigenvalues of the child, but then the representation */ 00647 /* tree could be different from the one when nothing is */ 00648 /* skipped. For this reason we skip at this place. */ 00649 idone = idone + newlst - newfst + 1; 00650 goto L139; 00651 } 00652 00653 /* Compute RRR of child cluster. */ 00654 /* Note that the new RRR is stored in Z */ 00655 00656 /* DLARRF needs LWORK = 2*N */ 00657 template_lapack_larrf(&in, &d__[ibegin], &l[ibegin], &work[indld + 00658 ibegin - 1], &newfst, &newlst, &work[wbegin], 00659 &wgap[wbegin], &werr[wbegin], &spdiam, &lgap, 00660 &rgap, pivmin, &tau, &z__[ibegin + newftt * 00661 z_dim1], &z__[ibegin + (newftt + 1) * z_dim1], 00662 &work[indwrk], &iinfo); 00663 if (iinfo == 0) { 00664 /* a new RRR for the cluster was found by DLARRF */ 00665 /* update shift and store it */ 00666 ssigma = sigma + tau; 00667 z__[iend + (newftt + 1) * z_dim1] = ssigma; 00668 /* WORK() are the midpoints and WERR() the semi-width */ 00669 /* Note that the entries in W are unchanged. */ 00670 i__4 = newlst; 00671 for (k = newfst; k <= i__4; ++k) { 00672 fudge = eps * 3. * (d__1 = work[wbegin + k - 00673 1], absMACRO(d__1)); 00674 work[wbegin + k - 1] -= tau; 00675 fudge += eps * 4. * (d__1 = work[wbegin + k - 00676 1], absMACRO(d__1)); 00677 /* Fudge errors */ 00678 werr[wbegin + k - 1] += fudge; 00679 /* Gaps are not fudged. Provided that WERR is small */ 00680 /* when eigenvalues are close, a zero gap indicates */ 00681 /* that a new representation is needed for resolving */ 00682 /* the cluster. A fudge could lead to a wrong decision */ 00683 /* of judging eigenvalues 'separated' which in */ 00684 /* reality are not. This could have a negative impact */ 00685 /* on the orthogonality of the computed eigenvectors. */ 00686 /* L116: */ 00687 } 00688 ++nclus; 00689 k = newcls + (nclus << 1); 00690 iwork[k - 1] = newfst; 00691 iwork[k] = newlst; 00692 } else { 00693 *info = -2; 00694 return 0; 00695 } 00696 } else { 00697 00698 /* Compute eigenvector of singleton */ 00699 00700 iter = 0; 00701 00702 tol = template_blas_log((Treal) in) * 4. * eps; 00703 00704 k = newfst; 00705 windex = wbegin + k - 1; 00706 /* Computing MAX */ 00707 i__4 = windex - 1; 00708 windmn = maxMACRO(i__4,1); 00709 /* Computing MIN */ 00710 i__4 = windex + 1; 00711 windpl = minMACRO(i__4,*m); 00712 lambda = work[windex]; 00713 ++done; 00714 /* Check if eigenvector computation is to be skipped */ 00715 if (windex < *dol || windex > *dou) { 00716 eskip = TRUE_; 00717 goto L125; 00718 } else { 00719 eskip = FALSE_; 00720 } 00721 left = work[windex] - werr[windex]; 00722 right = work[windex] + werr[windex]; 00723 indeig = indexw[windex]; 00724 /* Note that since we compute the eigenpairs for a child, */ 00725 /* all eigenvalue approximations are w.r.t the same shift. */ 00726 /* In this case, the entries in WORK should be used for */ 00727 /* computing the gaps since they exhibit even very small */ 00728 /* differences in the eigenvalues, as opposed to the */ 00729 /* entries in W which might "look" the same. */ 00730 if (k == 1) { 00731 /* In the case RANGE='I' and with not much initial */ 00732 /* accuracy in LAMBDA and VL, the formula */ 00733 /* LGAP = MAX( ZERO, (SIGMA - VL) + LAMBDA ) */ 00734 /* can lead to an overestimation of the left gap and */ 00735 /* thus to inadequately early RQI 'convergence'. */ 00736 /* Prevent this by forcing a small left gap. */ 00737 /* Computing MAX */ 00738 d__1 = absMACRO(left), d__2 = absMACRO(right); 00739 lgap = eps * maxMACRO(d__1,d__2); 00740 } else { 00741 lgap = wgap[windmn]; 00742 } 00743 if (k == im) { 00744 /* In the case RANGE='I' and with not much initial */ 00745 /* accuracy in LAMBDA and VU, the formula */ 00746 /* can lead to an overestimation of the right gap and */ 00747 /* thus to inadequately early RQI 'convergence'. */ 00748 /* Prevent this by forcing a small right gap. */ 00749 /* Computing MAX */ 00750 d__1 = absMACRO(left), d__2 = absMACRO(right); 00751 rgap = eps * maxMACRO(d__1,d__2); 00752 } else { 00753 rgap = wgap[windex]; 00754 } 00755 gap = minMACRO(lgap,rgap); 00756 if (k == 1 || k == im) { 00757 /* The eigenvector support can become wrong */ 00758 /* because significant entries could be cut off due to a */ 00759 /* large GAPTOL parameter in LAR1V. Prevent this. */ 00760 gaptol = 0.; 00761 } else { 00762 gaptol = gap * eps; 00763 } 00764 isupmn = in; 00765 isupmx = 1; 00766 /* Update WGAP so that it holds the minimum gap */ 00767 /* to the left or the right. This is crucial in the */ 00768 /* case where bisection is used to ensure that the */ 00769 /* eigenvalue is refined up to the required precision. */ 00770 /* The correct value is restored afterwards. */ 00771 savgap = wgap[windex]; 00772 wgap[windex] = gap; 00773 /* We want to use the Rayleigh Quotient Correction */ 00774 /* as often as possible since it converges quadratically */ 00775 /* when we are close enough to the desired eigenvalue. */ 00776 /* However, the Rayleigh Quotient can have the wrong sign */ 00777 /* and lead us away from the desired eigenvalue. In this */ 00778 /* case, the best we can do is to use bisection. */ 00779 usedbs = FALSE_; 00780 usedrq = FALSE_; 00781 /* Bisection is initially turned off unless it is forced */ 00782 needbs = ! tryrqc; 00783 L120: 00784 /* Check if bisection should be used to refine eigenvalue */ 00785 if (needbs) { 00786 /* Take the bisection as new iterate */ 00787 usedbs = TRUE_; 00788 itmp1 = iwork[iindr + windex]; 00789 offset = indexw[wbegin] - 1; 00790 d__1 = eps * 2.; 00791 template_lapack_larrb(&in, &d__[ibegin], &work[indlld + ibegin 00792 - 1], &indeig, &indeig, &c_b5, &d__1, & 00793 offset, &work[wbegin], &wgap[wbegin], & 00794 werr[wbegin], &work[indwrk], &iwork[ 00795 iindwk], pivmin, &spdiam, &itmp1, &iinfo); 00796 if (iinfo != 0) { 00797 *info = -3; 00798 return 0; 00799 } 00800 lambda = work[windex]; 00801 /* Reset twist index from inaccurate LAMBDA to */ 00802 /* force computation of true MINGMA */ 00803 iwork[iindr + windex] = 0; 00804 } 00805 /* Given LAMBDA, compute the eigenvector. */ 00806 L__1 = ! usedbs; 00807 template_lapack_lar1v(&in, &c__1, &in, &lambda, &d__[ibegin], &l[ 00808 ibegin], &work[indld + ibegin - 1], &work[ 00809 indlld + ibegin - 1], pivmin, &gaptol, &z__[ 00810 ibegin + windex * z_dim1], &L__1, &negcnt, & 00811 ztz, &mingma, &iwork[iindr + windex], &isuppz[ 00812 (windex << 1) - 1], &nrminv, &resid, &rqcorr, 00813 &work[indwrk]); 00814 if (iter == 0) { 00815 bstres = resid; 00816 bstw = lambda; 00817 } else if (resid < bstres) { 00818 bstres = resid; 00819 bstw = lambda; 00820 } 00821 /* Computing MIN */ 00822 i__4 = isupmn, i__5 = isuppz[(windex << 1) - 1]; 00823 isupmn = minMACRO(i__4,i__5); 00824 /* Computing MAX */ 00825 i__4 = isupmx, i__5 = isuppz[windex * 2]; 00826 isupmx = maxMACRO(i__4,i__5); 00827 ++iter; 00828 /* sin alpha <= |resid|/gap */ 00829 /* Note that both the residual and the gap are */ 00830 /* proportional to the matrix, so ||T|| doesn't play */ 00831 /* a role in the quotient */ 00832 00833 /* Convergence test for Rayleigh-Quotient iteration */ 00834 /* (omitted when Bisection has been used) */ 00835 00836 if (resid > tol * gap && absMACRO(rqcorr) > rqtol * absMACRO( 00837 lambda) && ! usedbs) { 00838 /* We need to check that the RQCORR update doesn't */ 00839 /* move the eigenvalue away from the desired one and */ 00840 /* towards a neighbor. -> protection with bisection */ 00841 if (indeig <= negcnt) { 00842 /* The wanted eigenvalue lies to the left */ 00843 sgndef = -1.; 00844 } else { 00845 /* The wanted eigenvalue lies to the right */ 00846 sgndef = 1.; 00847 } 00848 /* We only use the RQCORR if it improves the */ 00849 /* the iterate reasonably. */ 00850 if (rqcorr * sgndef >= 0. && lambda + rqcorr <= 00851 right && lambda + rqcorr >= left) { 00852 usedrq = TRUE_; 00853 /* Store new midpoint of bisection interval in WORK */ 00854 if (sgndef == 1.) { 00855 /* The current LAMBDA is on the left of the true */ 00856 /* eigenvalue */ 00857 left = lambda; 00858 /* We prefer to assume that the error estimate */ 00859 /* is correct. We could make the interval not */ 00860 /* as a bracket but to be modified if the RQCORR */ 00861 /* chooses to. In this case, the RIGHT side should */ 00862 /* be modified as follows: */ 00863 /* RIGHT = MAX(RIGHT, LAMBDA + RQCORR) */ 00864 } else { 00865 /* The current LAMBDA is on the right of the true */ 00866 /* eigenvalue */ 00867 right = lambda; 00868 /* See comment about assuming the error estimate is */ 00869 /* correct above. */ 00870 /* LEFT = MIN(LEFT, LAMBDA + RQCORR) */ 00871 } 00872 work[windex] = (right + left) * .5; 00873 /* Take RQCORR since it has the correct sign and */ 00874 /* improves the iterate reasonably */ 00875 lambda += rqcorr; 00876 /* Update width of error interval */ 00877 werr[windex] = (right - left) * .5; 00878 } else { 00879 needbs = TRUE_; 00880 } 00881 if (right - left < rqtol * absMACRO(lambda)) { 00882 /* The eigenvalue is computed to bisection accuracy */ 00883 /* compute eigenvector and stop */ 00884 usedbs = TRUE_; 00885 goto L120; 00886 } else if (iter < 10) { 00887 goto L120; 00888 } else if (iter == 10) { 00889 needbs = TRUE_; 00890 goto L120; 00891 } else { 00892 *info = 5; 00893 return 0; 00894 } 00895 } else { 00896 stp2ii = FALSE_; 00897 if (usedrq && usedbs && bstres <= resid) { 00898 lambda = bstw; 00899 stp2ii = TRUE_; 00900 } 00901 if (stp2ii) { 00902 /* improve error angle by second step */ 00903 L__1 = ! usedbs; 00904 template_lapack_lar1v(&in, &c__1, &in, &lambda, &d__[ibegin] 00905 , &l[ibegin], &work[indld + ibegin - 00906 1], &work[indlld + ibegin - 1], 00907 pivmin, &gaptol, &z__[ibegin + windex 00908 * z_dim1], &L__1, &negcnt, &ztz, & 00909 mingma, &iwork[iindr + windex], & 00910 isuppz[(windex << 1) - 1], &nrminv, & 00911 resid, &rqcorr, &work[indwrk]); 00912 } 00913 work[windex] = lambda; 00914 } 00915 00916 /* Compute FP-vector support w.r.t. whole matrix */ 00917 00918 isuppz[(windex << 1) - 1] += oldien; 00919 isuppz[windex * 2] += oldien; 00920 zfrom = isuppz[(windex << 1) - 1]; 00921 zto = isuppz[windex * 2]; 00922 isupmn += oldien; 00923 isupmx += oldien; 00924 /* Ensure vector is ok if support in the RQI has changed */ 00925 if (isupmn < zfrom) { 00926 i__4 = zfrom - 1; 00927 for (ii = isupmn; ii <= i__4; ++ii) { 00928 z__[ii + windex * z_dim1] = 0.; 00929 /* L122: */ 00930 } 00931 } 00932 if (isupmx > zto) { 00933 i__4 = isupmx; 00934 for (ii = zto + 1; ii <= i__4; ++ii) { 00935 z__[ii + windex * z_dim1] = 0.; 00936 /* L123: */ 00937 } 00938 } 00939 i__4 = zto - zfrom + 1; 00940 template_blas_scal(&i__4, &nrminv, &z__[zfrom + windex * z_dim1], 00941 &c__1); 00942 L125: 00943 /* Update W */ 00944 w[windex] = lambda + sigma; 00945 /* Recompute the gaps on the left and right */ 00946 /* But only allow them to become larger and not */ 00947 /* smaller (which can only happen through "bad" */ 00948 /* cancellation and doesn't reflect the theory */ 00949 /* where the initial gaps are underestimated due */ 00950 /* to WERR being too crude.) */ 00951 if (! eskip) { 00952 if (k > 1) { 00953 /* Computing MAX */ 00954 d__1 = wgap[windmn], d__2 = w[windex] - werr[ 00955 windex] - w[windmn] - werr[windmn]; 00956 wgap[windmn] = maxMACRO(d__1,d__2); 00957 } 00958 if (windex < wend) { 00959 /* Computing MAX */ 00960 d__1 = savgap, d__2 = w[windpl] - werr[windpl] 00961 - w[windex] - werr[windex]; 00962 wgap[windex] = maxMACRO(d__1,d__2); 00963 } 00964 } 00965 ++idone; 00966 } 00967 /* here ends the code for the current child */ 00968 00969 L139: 00970 /* Proceed to any remaining child nodes */ 00971 newfst = j + 1; 00972 L140: 00973 ; 00974 } 00975 /* L150: */ 00976 } 00977 ++ndepth; 00978 goto L40; 00979 } 00980 ibegin = iend + 1; 00981 wbegin = wend + 1; 00982 L170: 00983 ; 00984 } 00985 00986 return 0; 00987 00988 /* End of DLARRV */ 00989 00990 } /* dlarrv_ */ 00991 00992 #endif