ergo
|
00001 /* Ergo, version 3.2, a program for linear scaling electronic structure 00002 * calculations. 00003 * Copyright (C) 2012 Elias Rudberg, Emanuel H. Rubensson, and Pawel Salek. 00004 * 00005 * This program is free software: you can redistribute it and/or modify 00006 * it under the terms of the GNU General Public License as published by 00007 * the Free Software Foundation, either version 3 of the License, or 00008 * (at your option) any later version. 00009 * 00010 * This program is distributed in the hope that it will be useful, 00011 * but WITHOUT ANY WARRANTY; without even the implied warranty of 00012 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00013 * GNU General Public License for more details. 00014 * 00015 * You should have received a copy of the GNU General Public License 00016 * along with this program. If not, see <http://www.gnu.org/licenses/>. 00017 * 00018 * Primary academic reference: 00019 * KohnâSham Density Functional Theory Electronic Structure Calculations 00020 * with Linearly Scaling Computational Time and Memory Usage, 00021 * Elias Rudberg, Emanuel H. Rubensson, and Pawel Salek, 00022 * J. Chem. Theory Comput. 7, 340 (2011), 00023 * <http://dx.doi.org/10.1021/ct100611z> 00024 * 00025 * For further information about Ergo, see <http://www.ergoscf.org>. 00026 */ 00027 00028 /* This file belongs to the template_lapack part of the Ergo source 00029 * code. The source files in the template_lapack directory are modified 00030 * versions of files originally distributed as CLAPACK, see the 00031 * Copyright/license notice in the file template_lapack/COPYING. 00032 */ 00033 00034 00035 #ifndef TEMPLATE_LAPACK_STEVR_HEADER 00036 #define TEMPLATE_LAPACK_STEVR_HEADER 00037 00038 template<class Treal> 00039 int template_lapack_stevr(const char *jobz, const char *range, const integer *n, 00040 Treal * d__, Treal *e, const Treal *vl, 00041 const Treal *vu, const integer *il, 00042 const integer *iu, const Treal *abstol, 00043 integer *m, Treal *w, 00044 Treal *z__, const integer *ldz, integer *isuppz, 00045 Treal *work, 00046 integer *lwork, integer *iwork, integer *liwork, 00047 integer *info) 00048 { 00049 /* System generated locals */ 00050 integer z_dim1, z_offset, i__1, i__2; 00051 Treal d__1, d__2; 00052 00053 /* Builtin functions */ 00054 00055 /* Local variables */ 00056 integer i__, j, jj; 00057 Treal eps, vll, vuu, tmp1; 00058 integer imax; 00059 Treal rmin, rmax; 00060 logical test; 00061 Treal tnrm; 00062 integer itmp1; 00063 Treal sigma; 00064 char order[1]; 00065 integer lwmin; 00066 logical wantz; 00067 logical alleig, indeig; 00068 integer iscale, ieeeok, indibl, indifl; 00069 logical valeig; 00070 Treal safmin; 00071 Treal bignum; 00072 integer indisp; 00073 integer indiwo; 00074 integer liwmin; 00075 logical tryrac; 00076 integer nsplit; 00077 Treal smlnum; 00078 logical lquery; 00079 00080 00081 /* -- LAPACK driver routine (version 3.2) -- */ 00082 /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ 00083 /* November 2006 */ 00084 00085 /* .. Scalar Arguments .. */ 00086 /* .. */ 00087 /* .. Array Arguments .. */ 00088 /* .. */ 00089 00090 /* Purpose */ 00091 /* ======= */ 00092 00093 /* DSTEVR computes selected eigenvalues and, optionally, eigenvectors */ 00094 /* of a real symmetric tridiagonal matrix T. Eigenvalues and */ 00095 /* eigenvectors can be selected by specifying either a range of values */ 00096 /* or a range of indices for the desired eigenvalues. */ 00097 00098 /* Whenever possible, DSTEVR calls DSTEMR to compute the */ 00099 /* eigenspectrum using Relatively Robust Representations. DSTEMR */ 00100 /* computes eigenvalues by the dqds algorithm, while orthogonal */ 00101 /* eigenvectors are computed from various "good" L D L^T representations */ 00102 /* (also known as Relatively Robust Representations). Gram-Schmidt */ 00103 /* orthogonalization is avoided as far as possible. More specifically, */ 00104 /* the various steps of the algorithm are as follows. For the i-th */ 00105 /* unreduced block of T, */ 00106 /* (a) Compute T - sigma_i = L_i D_i L_i^T, such that L_i D_i L_i^T */ 00107 /* is a relatively robust representation, */ 00108 /* (b) Compute the eigenvalues, lambda_j, of L_i D_i L_i^T to high */ 00109 /* relative accuracy by the dqds algorithm, */ 00110 /* (c) If there is a cluster of close eigenvalues, "choose" sigma_i */ 00111 /* close to the cluster, and go to step (a), */ 00112 /* (d) Given the approximate eigenvalue lambda_j of L_i D_i L_i^T, */ 00113 /* compute the corresponding eigenvector by forming a */ 00114 /* rank-revealing twisted factorization. */ 00115 /* The desired accuracy of the output can be specified by the input */ 00116 /* parameter ABSTOL. */ 00117 00118 /* For more details, see "A new O(n^2) algorithm for the symmetric */ 00119 /* tridiagonal eigenvalue/eigenvector problem", by Inderjit Dhillon, */ 00120 /* Computer Science Division Technical Report No. UCB//CSD-97-971, */ 00121 /* UC Berkeley, May 1997. */ 00122 00123 00124 /* Note 1 : DSTEVR calls DSTEMR when the full spectrum is requested */ 00125 /* on machines which conform to the ieee-754 floating point standard. */ 00126 /* DSTEVR calls DSTEBZ and DSTEIN on non-ieee machines and */ 00127 /* when partial spectrum requests are made. */ 00128 00129 /* Normal execution of DSTEMR may create NaNs and infinities and */ 00130 /* hence may abort due to a floating point exception in environments */ 00131 /* which do not handle NaNs and infinities in the ieee standard default */ 00132 /* manner. */ 00133 00134 /* Arguments */ 00135 /* ========= */ 00136 00137 /* JOBZ (input) CHARACTER*1 */ 00138 /* = 'N': Compute eigenvalues only; */ 00139 /* = 'V': Compute eigenvalues and eigenvectors. */ 00140 00141 /* RANGE (input) CHARACTER*1 */ 00142 /* = 'A': all eigenvalues will be found. */ 00143 /* = 'V': all eigenvalues in the half-open interval (VL,VU] */ 00144 /* will be found. */ 00145 /* = 'I': the IL-th through IU-th eigenvalues will be found. */ 00146 /* ********* For RANGE = 'V' or 'I' and IU - IL < N - 1, DSTEBZ and */ 00147 /* ********* DSTEIN are called */ 00148 00149 /* N (input) INTEGER */ 00150 /* The order of the matrix. N >= 0. */ 00151 00152 /* D (input/output) DOUBLE PRECISION array, dimension (N) */ 00153 /* On entry, the n diagonal elements of the tridiagonal matrix */ 00154 /* A. */ 00155 /* On exit, D may be multiplied by a constant factor chosen */ 00156 /* to avoid over/underflow in computing the eigenvalues. */ 00157 00158 /* E (input/output) DOUBLE PRECISION array, dimension (max(1,N-1)) */ 00159 /* On entry, the (n-1) subdiagonal elements of the tridiagonal */ 00160 /* matrix A in elements 1 to N-1 of E. */ 00161 /* On exit, E may be multiplied by a constant factor chosen */ 00162 /* to avoid over/underflow in computing the eigenvalues. */ 00163 00164 /* VL (input) DOUBLE PRECISION */ 00165 /* VU (input) DOUBLE PRECISION */ 00166 /* If RANGE='V', the lower and upper bounds of the interval to */ 00167 /* be searched for eigenvalues. VL < VU. */ 00168 /* Not referenced if RANGE = 'A' or 'I'. */ 00169 00170 /* IL (input) INTEGER */ 00171 /* IU (input) INTEGER */ 00172 /* If RANGE='I', the indices (in ascending order) of the */ 00173 /* smallest and largest eigenvalues to be returned. */ 00174 /* 1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0. */ 00175 /* Not referenced if RANGE = 'A' or 'V'. */ 00176 00177 /* ABSTOL (input) DOUBLE PRECISION */ 00178 /* The absolute error tolerance for the eigenvalues. */ 00179 /* An approximate eigenvalue is accepted as converged */ 00180 /* when it is determined to lie in an interval [a,b] */ 00181 /* of width less than or equal to */ 00182 00183 /* ABSTOL + EPS * max( |a|,|b| ) , */ 00184 00185 /* where EPS is the machine precision. If ABSTOL is less than */ 00186 /* or equal to zero, then EPS*|T| will be used in its place, */ 00187 /* where |T| is the 1-norm of the tridiagonal matrix obtained */ 00188 /* by reducing A to tridiagonal form. */ 00189 00190 /* See "Computing Small Singular Values of Bidiagonal Matrices */ 00191 /* with Guaranteed High Relative Accuracy," by Demmel and */ 00192 /* Kahan, LAPACK Working Note #3. */ 00193 00194 /* If high relative accuracy is important, set ABSTOL to */ 00195 /* DLAMCH( 'Safe minimum' ). Doing so will guarantee that */ 00196 /* eigenvalues are computed to high relative accuracy when */ 00197 /* possible in future releases. The current code does not */ 00198 /* make any guarantees about high relative accuracy, but */ 00199 /* future releases will. See J. Barlow and J. Demmel, */ 00200 /* "Computing Accurate Eigensystems of Scaled Diagonally */ 00201 /* Dominant Matrices", LAPACK Working Note #7, for a discussion */ 00202 /* of which matrices define their eigenvalues to high relative */ 00203 /* accuracy. */ 00204 00205 /* M (output) INTEGER */ 00206 /* The total number of eigenvalues found. 0 <= M <= N. */ 00207 /* If RANGE = 'A', M = N, and if RANGE = 'I', M = IU-IL+1. */ 00208 00209 /* W (output) DOUBLE PRECISION array, dimension (N) */ 00210 /* The first M elements contain the selected eigenvalues in */ 00211 /* ascending order. */ 00212 00213 /* Z (output) DOUBLE PRECISION array, dimension (LDZ, max(1,M) ) */ 00214 /* If JOBZ = 'V', then if INFO = 0, the first M columns of Z */ 00215 /* contain the orthonormal eigenvectors of the matrix A */ 00216 /* corresponding to the selected eigenvalues, with the i-th */ 00217 /* column of Z holding the eigenvector associated with W(i). */ 00218 /* Note: the user must ensure that at least max(1,M) columns are */ 00219 /* supplied in the array Z; if RANGE = 'V', the exact value of M */ 00220 /* is not known in advance and an upper bound must be used. */ 00221 00222 /* LDZ (input) INTEGER */ 00223 /* The leading dimension of the array Z. LDZ >= 1, and if */ 00224 /* JOBZ = 'V', LDZ >= max(1,N). */ 00225 00226 /* ISUPPZ (output) INTEGER array, dimension ( 2*max(1,M) ) */ 00227 /* The support of the eigenvectors in Z, i.e., the indices */ 00228 /* indicating the nonzero elements in Z. The i-th eigenvector */ 00229 /* is nonzero only in elements ISUPPZ( 2*i-1 ) through */ 00230 /* ISUPPZ( 2*i ). */ 00231 /* ********* Implemented only for RANGE = 'A' or 'I' and IU - IL = N - 1 */ 00232 00233 /* WORK (workspace/output) DOUBLE PRECISION array, dimension (MAX(1,LWORK)) */ 00234 /* On exit, if INFO = 0, WORK(1) returns the optimal (and */ 00235 /* minimal) LWORK. */ 00236 00237 /* LWORK (input) INTEGER */ 00238 /* The dimension of the array WORK. LWORK >= max(1,20*N). */ 00239 00240 /* If LWORK = -1, then a workspace query is assumed; the routine */ 00241 /* only calculates the optimal sizes of the WORK and IWORK */ 00242 /* arrays, returns these values as the first entries of the WORK */ 00243 /* and IWORK arrays, and no error message related to LWORK or */ 00244 /* LIWORK is issued by XERBLA. */ 00245 00246 /* IWORK (workspace/output) INTEGER array, dimension (MAX(1,LIWORK)) */ 00247 /* On exit, if INFO = 0, IWORK(1) returns the optimal (and */ 00248 /* minimal) LIWORK. */ 00249 00250 /* LIWORK (input) INTEGER */ 00251 /* The dimension of the array IWORK. LIWORK >= max(1,10*N). */ 00252 00253 /* If LIWORK = -1, then a workspace query is assumed; the */ 00254 /* routine only calculates the optimal sizes of the WORK and */ 00255 /* IWORK arrays, returns these values as the first entries of */ 00256 /* the WORK and IWORK arrays, and no error message related to */ 00257 /* LWORK or LIWORK is issued by XERBLA. */ 00258 00259 /* INFO (output) INTEGER */ 00260 /* = 0: successful exit */ 00261 /* < 0: if INFO = -i, the i-th argument had an illegal value */ 00262 /* > 0: Internal error */ 00263 00264 /* Further Details */ 00265 /* =============== */ 00266 00267 /* Based on contributions by */ 00268 /* Inderjit Dhillon, IBM Almaden, USA */ 00269 /* Osni Marques, LBNL/NERSC, USA */ 00270 /* Ken Stanley, Computer Science Division, University of */ 00271 /* California at Berkeley, USA */ 00272 00273 /* ===================================================================== */ 00274 00275 /* .. Parameters .. */ 00276 /* .. */ 00277 /* .. Local Scalars .. */ 00278 /* .. */ 00279 /* .. External Functions .. */ 00280 /* .. */ 00281 /* .. External Subroutines .. */ 00282 /* .. */ 00283 /* .. Intrinsic Functions .. */ 00284 /* .. */ 00285 /* .. Executable Statements .. */ 00286 00287 00288 /* Test the input parameters. */ 00289 00290 /* Parameter adjustments */ 00291 /* Table of constant values */ 00292 00293 integer c__10 = 10; 00294 integer c__1 = 1; 00295 integer c__2 = 2; 00296 integer c__3 = 3; 00297 integer c__4 = 4; 00298 00299 --d__; 00300 --e; 00301 --w; 00302 z_dim1 = *ldz; 00303 z_offset = 1 + z_dim1; 00304 z__ -= z_offset; 00305 --isuppz; 00306 --work; 00307 --iwork; 00308 00309 /* Function Body */ 00310 ieeeok = template_lapack_ilaenv(&c__10, "DSTEVR", "N", &c__1, &c__2, &c__3, &c__4, (ftnlen)6, (ftnlen)1); 00311 00312 wantz = template_blas_lsame(jobz, "V"); 00313 alleig = template_blas_lsame(range, "A"); 00314 valeig = template_blas_lsame(range, "V"); 00315 indeig = template_blas_lsame(range, "I"); 00316 00317 lquery = *lwork == -1 || *liwork == -1; 00318 /* Computing MAX */ 00319 i__1 = 1, i__2 = *n * 20; 00320 lwmin = maxMACRO(i__1,i__2); 00321 /* Computing MAX */ 00322 i__1 = 1, i__2 = *n * 10; 00323 liwmin = maxMACRO(i__1,i__2); 00324 00325 00326 *info = 0; 00327 if (! (wantz || template_blas_lsame(jobz, "N"))) { 00328 *info = -1; 00329 } else if (! (alleig || valeig || indeig)) { 00330 *info = -2; 00331 } else if (*n < 0) { 00332 *info = -3; 00333 } else { 00334 if (valeig) { 00335 if (*n > 0 && *vu <= *vl) { 00336 *info = -7; 00337 } 00338 } else if (indeig) { 00339 if (*il < 1 || *il > maxMACRO(1,*n)) { 00340 *info = -8; 00341 } else if (*iu < minMACRO(*n,*il) || *iu > *n) { 00342 *info = -9; 00343 } 00344 } 00345 } 00346 if (*info == 0) { 00347 if (*ldz < 1 || ( wantz && *ldz < *n ) ) { 00348 *info = -14; 00349 } 00350 } 00351 00352 if (*info == 0) { 00353 work[1] = (Treal) lwmin; 00354 iwork[1] = liwmin; 00355 00356 if (*lwork < lwmin && ! lquery) { 00357 *info = -17; 00358 } else if (*liwork < liwmin && ! lquery) { 00359 *info = -19; 00360 } 00361 } 00362 00363 if (*info != 0) { 00364 i__1 = -(*info); 00365 template_blas_erbla("STEVR", &i__1); 00366 return 0; 00367 } else if (lquery) { 00368 return 0; 00369 } 00370 00371 /* Quick return if possible */ 00372 00373 *m = 0; 00374 if (*n == 0) { 00375 return 0; 00376 } 00377 00378 if (*n == 1) { 00379 if (alleig || indeig) { 00380 *m = 1; 00381 w[1] = d__[1]; 00382 } else { 00383 if (*vl < d__[1] && *vu >= d__[1]) { 00384 *m = 1; 00385 w[1] = d__[1]; 00386 } 00387 } 00388 if (wantz) { 00389 z__[z_dim1 + 1] = 1.; 00390 } 00391 return 0; 00392 } 00393 00394 /* Get machine constants. */ 00395 00396 safmin = template_lapack_lamch("Safe minimum", (Treal)0); 00397 eps = template_lapack_lamch("Precision", (Treal)0); 00398 smlnum = safmin / eps; 00399 bignum = 1. / smlnum; 00400 rmin = template_blas_sqrt(smlnum); 00401 /* Computing MIN */ 00402 d__1 = template_blas_sqrt(bignum), d__2 = 1. / template_blas_sqrt(template_blas_sqrt(safmin)); 00403 rmax = minMACRO(d__1,d__2); 00404 00405 00406 /* Scale matrix to allowable range, if necessary. */ 00407 00408 iscale = 0; 00409 vll = *vl; 00410 vuu = *vu; 00411 00412 tnrm = template_lapack_lanst("M", n, &d__[1], &e[1]); 00413 if (tnrm > 0. && tnrm < rmin) { 00414 iscale = 1; 00415 sigma = rmin / tnrm; 00416 } else if (tnrm > rmax) { 00417 iscale = 1; 00418 sigma = rmax / tnrm; 00419 } 00420 if (iscale == 1) { 00421 template_blas_scal(n, &sigma, &d__[1], &c__1); 00422 i__1 = *n - 1; 00423 template_blas_scal(&i__1, &sigma, &e[1], &c__1); 00424 if (valeig) { 00425 vll = *vl * sigma; 00426 vuu = *vu * sigma; 00427 } 00428 } 00429 /* Initialize indices into workspaces. Note: These indices are used only */ 00430 /* if DSTERF or DSTEMR fail. */ 00431 /* IWORK(INDIBL:INDIBL+M-1) corresponds to IBLOCK in DSTEBZ and */ 00432 /* stores the block indices of each of the M<=N eigenvalues. */ 00433 indibl = 1; 00434 /* IWORK(INDISP:INDISP+NSPLIT-1) corresponds to ISPLIT in DSTEBZ and */ 00435 /* stores the starting and finishing indices of each block. */ 00436 indisp = indibl + *n; 00437 /* IWORK(INDIFL:INDIFL+N-1) stores the indices of eigenvectors */ 00438 /* that corresponding to eigenvectors that fail to converge in */ 00439 /* DSTEIN. This information is discarded; if any fail, the driver */ 00440 /* returns INFO > 0. */ 00441 indifl = indisp + *n; 00442 /* INDIWO is the offset of the remaining integer workspace. */ 00443 indiwo = indisp + *n; 00444 00445 /* If all eigenvalues are desired, then */ 00446 /* call DSTERF or DSTEMR. If this fails for some eigenvalue, then */ 00447 /* try DSTEBZ. */ 00448 00449 00450 test = FALSE_; 00451 if (indeig) { 00452 if (*il == 1 && *iu == *n) { 00453 test = TRUE_; 00454 } 00455 } 00456 if ((alleig || test) && ieeeok == 1) { 00457 i__1 = *n - 1; 00458 template_blas_copy(&i__1, &e[1], &c__1, &work[1], &c__1); 00459 if (! wantz) { 00460 template_blas_copy(n, &d__[1], &c__1, &w[1], &c__1); 00461 template_lapack_sterf(n, &w[1], &work[1], info); 00462 } else { 00463 template_blas_copy(n, &d__[1], &c__1, &work[*n + 1], &c__1); 00464 if (*abstol <= *n * 2. * eps) { 00465 tryrac = TRUE_; 00466 } else { 00467 tryrac = FALSE_; 00468 } 00469 i__1 = *lwork - (*n << 1); 00470 template_lapack_stemr(jobz, "A", n, &work[*n + 1], &work[1], vl, vu, il, iu, m, 00471 &w[1], &z__[z_offset], ldz, n, &isuppz[1], &tryrac, &work[ 00472 (*n << 1) + 1], &i__1, &iwork[1], liwork, info); 00473 00474 } 00475 if (*info == 0) { 00476 *m = *n; 00477 goto L10; 00478 } 00479 *info = 0; 00480 } 00481 00482 /* Otherwise, call DSTEBZ and, if eigenvectors are desired, DSTEIN. */ 00483 00484 if (wantz) { 00485 *(unsigned char *)order = 'B'; 00486 } else { 00487 *(unsigned char *)order = 'E'; 00488 } 00489 template_lapack_stebz(range, order, n, &vll, &vuu, il, iu, abstol, &d__[1], &e[1], m, & 00490 nsplit, &w[1], &iwork[indibl], &iwork[indisp], &work[1], &iwork[ 00491 indiwo], info); 00492 00493 if (wantz) { 00494 template_lapack_stein(n, &d__[1], &e[1], m, &w[1], &iwork[indibl], &iwork[indisp], & 00495 z__[z_offset], ldz, &work[1], &iwork[indiwo], &iwork[indifl], 00496 info); 00497 } 00498 00499 /* If matrix was scaled, then rescale eigenvalues appropriately. */ 00500 00501 L10: 00502 if (iscale == 1) { 00503 if (*info == 0) { 00504 imax = *m; 00505 } else { 00506 imax = *info - 1; 00507 } 00508 d__1 = 1. / sigma; 00509 template_blas_scal(&imax, &d__1, &w[1], &c__1); 00510 } 00511 00512 /* If eigenvalues are not in order, then sort them, along with */ 00513 /* eigenvectors. */ 00514 00515 if (wantz) { 00516 i__1 = *m - 1; 00517 for (j = 1; j <= i__1; ++j) { 00518 i__ = 0; 00519 tmp1 = w[j]; 00520 i__2 = *m; 00521 for (jj = j + 1; jj <= i__2; ++jj) { 00522 if (w[jj] < tmp1) { 00523 i__ = jj; 00524 tmp1 = w[jj]; 00525 } 00526 /* L20: */ 00527 } 00528 00529 if (i__ != 0) { 00530 itmp1 = iwork[i__]; 00531 w[i__] = w[j]; 00532 iwork[i__] = iwork[j]; 00533 w[j] = tmp1; 00534 iwork[j] = itmp1; 00535 template_blas_swap(n, &z__[i__ * z_dim1 + 1], &c__1, &z__[j * z_dim1 + 1], 00536 &c__1); 00537 } 00538 /* L30: */ 00539 } 00540 } 00541 00542 /* Causes problems with tests 19 & 20: */ 00543 /* IF (wantz .and. INDEIG ) Z( 1,1) = Z(1,1) / 1.002 + .002 */ 00544 00545 00546 work[1] = (Treal) lwmin; 00547 iwork[1] = liwmin; 00548 return 0; 00549 00550 /* End of DSTEVR */ 00551 00552 } /* dstevr_ */ 00553 00554 #endif 00555