ergo
|
00001 /* Ergo, version 3.2, a program for linear scaling electronic structure 00002 * calculations. 00003 * Copyright (C) 2012 Elias Rudberg, Emanuel H. Rubensson, and Pawel Salek. 00004 * 00005 * This program is free software: you can redistribute it and/or modify 00006 * it under the terms of the GNU General Public License as published by 00007 * the Free Software Foundation, either version 3 of the License, or 00008 * (at your option) any later version. 00009 * 00010 * This program is distributed in the hope that it will be useful, 00011 * but WITHOUT ANY WARRANTY; without even the implied warranty of 00012 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00013 * GNU General Public License for more details. 00014 * 00015 * You should have received a copy of the GNU General Public License 00016 * along with this program. If not, see <http://www.gnu.org/licenses/>. 00017 * 00018 * Primary academic reference: 00019 * KohnâSham Density Functional Theory Electronic Structure Calculations 00020 * with Linearly Scaling Computational Time and Memory Usage, 00021 * Elias Rudberg, Emanuel H. Rubensson, and Pawel Salek, 00022 * J. Chem. Theory Comput. 7, 340 (2011), 00023 * <http://dx.doi.org/10.1021/ct100611z> 00024 * 00025 * For further information about Ergo, see <http://www.ergoscf.org>. 00026 */ 00027 00028 /* This file belongs to the template_lapack part of the Ergo source 00029 * code. The source files in the template_lapack directory are modified 00030 * versions of files originally distributed as CLAPACK, see the 00031 * Copyright/license notice in the file template_lapack/COPYING. 00032 */ 00033 00034 00035 #ifndef TEMPLATE_LAPACK_GGBAL_HEADER 00036 #define TEMPLATE_LAPACK_GGBAL_HEADER 00037 00038 00039 template<class Treal> 00040 int template_lapack_ggbal(const char *job, const integer *n, Treal *a, const integer * 00041 lda, Treal *b, const integer *ldb, integer *ilo, integer *ihi, 00042 Treal *lscale, Treal *rscale, Treal *work, integer * 00043 info) 00044 { 00045 /* -- LAPACK routine (version 3.0) -- 00046 Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., 00047 Courant Institute, Argonne National Lab, and Rice University 00048 September 30, 1994 00049 00050 00051 Purpose 00052 ======= 00053 00054 DGGBAL balances a pair of general real matrices (A,B). This 00055 involves, first, permuting A and B by similarity transformations to 00056 isolate eigenvalues in the first 1 to ILO$-$1 and last IHI+1 to N 00057 elements on the diagonal; and second, applying a diagonal similarity 00058 transformation to rows and columns ILO to IHI to make the rows 00059 and columns as close in norm as possible. Both steps are optional. 00060 00061 Balancing may reduce the 1-norm of the matrices, and improve the 00062 accuracy of the computed eigenvalues and/or eigenvectors in the 00063 generalized eigenvalue problem A*x = lambda*B*x. 00064 00065 Arguments 00066 ========= 00067 00068 JOB (input) CHARACTER*1 00069 Specifies the operations to be performed on A and B: 00070 = 'N': none: simply set ILO = 1, IHI = N, LSCALE(I) = 1.0 00071 and RSCALE(I) = 1.0 for i = 1,...,N. 00072 = 'P': permute only; 00073 = 'S': scale only; 00074 = 'B': both permute and scale. 00075 00076 N (input) INTEGER 00077 The order of the matrices A and B. N >= 0. 00078 00079 A (input/output) DOUBLE PRECISION array, dimension (LDA,N) 00080 On entry, the input matrix A. 00081 On exit, A is overwritten by the balanced matrix. 00082 If JOB = 'N', A is not referenced. 00083 00084 LDA (input) INTEGER 00085 The leading dimension of the array A. LDA >= max(1,N). 00086 00087 B (input/output) DOUBLE PRECISION array, dimension (LDB,N) 00088 On entry, the input matrix B. 00089 On exit, B is overwritten by the balanced matrix. 00090 If JOB = 'N', B is not referenced. 00091 00092 LDB (input) INTEGER 00093 The leading dimension of the array B. LDB >= max(1,N). 00094 00095 ILO (output) INTEGER 00096 IHI (output) INTEGER 00097 ILO and IHI are set to integers such that on exit 00098 A(i,j) = 0 and B(i,j) = 0 if i > j and 00099 j = 1,...,ILO-1 or i = IHI+1,...,N. 00100 If JOB = 'N' or 'S', ILO = 1 and IHI = N. 00101 00102 LSCALE (output) DOUBLE PRECISION array, dimension (N) 00103 Details of the permutations and scaling factors applied 00104 to the left side of A and B. If P(j) is the index of the 00105 row interchanged with row j, and D(j) 00106 is the scaling factor applied to row j, then 00107 LSCALE(j) = P(j) for J = 1,...,ILO-1 00108 = D(j) for J = ILO,...,IHI 00109 = P(j) for J = IHI+1,...,N. 00110 The order in which the interchanges are made is N to IHI+1, 00111 then 1 to ILO-1. 00112 00113 RSCALE (output) DOUBLE PRECISION array, dimension (N) 00114 Details of the permutations and scaling factors applied 00115 to the right side of A and B. If P(j) is the index of the 00116 column interchanged with column j, and D(j) 00117 is the scaling factor applied to column j, then 00118 LSCALE(j) = P(j) for J = 1,...,ILO-1 00119 = D(j) for J = ILO,...,IHI 00120 = P(j) for J = IHI+1,...,N. 00121 The order in which the interchanges are made is N to IHI+1, 00122 then 1 to ILO-1. 00123 00124 WORK (workspace) DOUBLE PRECISION array, dimension (6*N) 00125 00126 INFO (output) INTEGER 00127 = 0: successful exit 00128 < 0: if INFO = -i, the i-th argument had an illegal value. 00129 00130 Further Details 00131 =============== 00132 00133 See R.C. WARD, Balancing the generalized eigenvalue problem, 00134 SIAM J. Sci. Stat. Comp. 2 (1981), 141-152. 00135 00136 ===================================================================== 00137 00138 00139 Test the input parameters 00140 00141 Parameter adjustments */ 00142 /* Table of constant values */ 00143 integer c__1 = 1; 00144 Treal c_b34 = 10.; 00145 Treal c_b70 = .5; 00146 00147 /* System generated locals */ 00148 integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2, i__3; 00149 Treal d__1, d__2, d__3; 00150 /* Local variables */ 00151 integer lcab; 00152 Treal beta, coef; 00153 integer irab, lrab; 00154 Treal basl, cmax; 00155 Treal coef2, coef5; 00156 integer i__, j, k, l, m; 00157 Treal gamma, t, alpha; 00158 Treal sfmin, sfmax; 00159 integer iflow; 00160 integer kount, jc; 00161 Treal ta, tb, tc; 00162 integer ir, it; 00163 Treal ew; 00164 integer nr; 00165 Treal pgamma; 00166 integer lsfmin, lsfmax, ip1, jp1, lm1; 00167 Treal cab, rab, ewc, cor, sum; 00168 integer nrp2, icab; 00169 #define a_ref(a_1,a_2) a[(a_2)*a_dim1 + a_1] 00170 #define b_ref(a_1,a_2) b[(a_2)*b_dim1 + a_1] 00171 00172 00173 a_dim1 = *lda; 00174 a_offset = 1 + a_dim1 * 1; 00175 a -= a_offset; 00176 b_dim1 = *ldb; 00177 b_offset = 1 + b_dim1 * 1; 00178 b -= b_offset; 00179 --lscale; 00180 --rscale; 00181 --work; 00182 00183 /* Initialization added by Elias to get rid of compiler warnings. */ 00184 pgamma = 0; 00185 /* Function Body */ 00186 *info = 0; 00187 if (! template_blas_lsame(job, "N") && ! template_blas_lsame(job, "P") && ! template_blas_lsame(job, "S") 00188 && ! template_blas_lsame(job, "B")) { 00189 *info = -1; 00190 } else if (*n < 0) { 00191 *info = -2; 00192 } else if (*lda < maxMACRO(1,*n)) { 00193 *info = -4; 00194 } else if (*ldb < maxMACRO(1,*n)) { 00195 *info = -5; 00196 } 00197 if (*info != 0) { 00198 i__1 = -(*info); 00199 template_blas_erbla("GGBAL ", &i__1); 00200 return 0; 00201 } 00202 00203 k = 1; 00204 l = *n; 00205 00206 /* Quick return if possible */ 00207 00208 if (*n == 0) { 00209 return 0; 00210 } 00211 00212 if (template_blas_lsame(job, "N")) { 00213 *ilo = 1; 00214 *ihi = *n; 00215 i__1 = *n; 00216 for (i__ = 1; i__ <= i__1; ++i__) { 00217 lscale[i__] = 1.; 00218 rscale[i__] = 1.; 00219 /* L10: */ 00220 } 00221 return 0; 00222 } 00223 00224 if (k == l) { 00225 *ilo = 1; 00226 *ihi = 1; 00227 lscale[1] = 1.; 00228 rscale[1] = 1.; 00229 return 0; 00230 } 00231 00232 if (template_blas_lsame(job, "S")) { 00233 goto L190; 00234 } 00235 00236 goto L30; 00237 00238 /* Permute the matrices A and B to isolate the eigenvalues. 00239 00240 Find row with one nonzero in columns 1 through L */ 00241 00242 L20: 00243 l = lm1; 00244 if (l != 1) { 00245 goto L30; 00246 } 00247 00248 rscale[1] = 1.; 00249 lscale[1] = 1.; 00250 goto L190; 00251 00252 L30: 00253 lm1 = l - 1; 00254 for (i__ = l; i__ >= 1; --i__) { 00255 i__1 = lm1; 00256 for (j = 1; j <= i__1; ++j) { 00257 jp1 = j + 1; 00258 if (a_ref(i__, j) != 0. || b_ref(i__, j) != 0.) { 00259 goto L50; 00260 } 00261 /* L40: */ 00262 } 00263 j = l; 00264 goto L70; 00265 00266 L50: 00267 i__1 = l; 00268 for (j = jp1; j <= i__1; ++j) { 00269 if (a_ref(i__, j) != 0. || b_ref(i__, j) != 0.) { 00270 goto L80; 00271 } 00272 /* L60: */ 00273 } 00274 j = jp1 - 1; 00275 00276 L70: 00277 m = l; 00278 iflow = 1; 00279 goto L160; 00280 L80: 00281 ; 00282 } 00283 goto L100; 00284 00285 /* Find column with one nonzero in rows K through N */ 00286 00287 L90: 00288 ++k; 00289 00290 L100: 00291 i__1 = l; 00292 for (j = k; j <= i__1; ++j) { 00293 i__2 = lm1; 00294 for (i__ = k; i__ <= i__2; ++i__) { 00295 ip1 = i__ + 1; 00296 if (a_ref(i__, j) != 0. || b_ref(i__, j) != 0.) { 00297 goto L120; 00298 } 00299 /* L110: */ 00300 } 00301 i__ = l; 00302 goto L140; 00303 L120: 00304 i__2 = l; 00305 for (i__ = ip1; i__ <= i__2; ++i__) { 00306 if (a_ref(i__, j) != 0. || b_ref(i__, j) != 0.) { 00307 goto L150; 00308 } 00309 /* L130: */ 00310 } 00311 i__ = ip1 - 1; 00312 L140: 00313 m = k; 00314 iflow = 2; 00315 goto L160; 00316 L150: 00317 ; 00318 } 00319 goto L190; 00320 00321 /* Permute rows M and I */ 00322 00323 L160: 00324 lscale[m] = (Treal) i__; 00325 if (i__ == m) { 00326 goto L170; 00327 } 00328 i__1 = *n - k + 1; 00329 template_blas_swap(&i__1, &a_ref(i__, k), lda, &a_ref(m, k), lda); 00330 i__1 = *n - k + 1; 00331 template_blas_swap(&i__1, &b_ref(i__, k), ldb, &b_ref(m, k), ldb); 00332 00333 /* Permute columns M and J */ 00334 00335 L170: 00336 rscale[m] = (Treal) j; 00337 if (j == m) { 00338 goto L180; 00339 } 00340 template_blas_swap(&l, &a_ref(1, j), &c__1, &a_ref(1, m), &c__1); 00341 template_blas_swap(&l, &b_ref(1, j), &c__1, &b_ref(1, m), &c__1); 00342 00343 L180: 00344 switch (iflow) { 00345 case 1: goto L20; 00346 case 2: goto L90; 00347 } 00348 00349 L190: 00350 *ilo = k; 00351 *ihi = l; 00352 00353 if (*ilo == *ihi) { 00354 return 0; 00355 } 00356 00357 if (template_blas_lsame(job, "P")) { 00358 return 0; 00359 } 00360 00361 /* Balance the submatrix in rows ILO to IHI. */ 00362 00363 nr = *ihi - *ilo + 1; 00364 i__1 = *ihi; 00365 for (i__ = *ilo; i__ <= i__1; ++i__) { 00366 rscale[i__] = 0.; 00367 lscale[i__] = 0.; 00368 00369 work[i__] = 0.; 00370 work[i__ + *n] = 0.; 00371 work[i__ + (*n << 1)] = 0.; 00372 work[i__ + *n * 3] = 0.; 00373 work[i__ + (*n << 2)] = 0.; 00374 work[i__ + *n * 5] = 0.; 00375 /* L200: */ 00376 } 00377 00378 /* Compute right side vector in resulting linear equations */ 00379 00380 basl = template_blas_lg10(&c_b34); 00381 i__1 = *ihi; 00382 for (i__ = *ilo; i__ <= i__1; ++i__) { 00383 i__2 = *ihi; 00384 for (j = *ilo; j <= i__2; ++j) { 00385 tb = b_ref(i__, j); 00386 ta = a_ref(i__, j); 00387 if (ta == 0.) { 00388 goto L210; 00389 } 00390 d__1 = absMACRO(ta); 00391 ta = template_blas_lg10(&d__1) / basl; 00392 L210: 00393 if (tb == 0.) { 00394 goto L220; 00395 } 00396 d__1 = absMACRO(tb); 00397 tb = template_blas_lg10(&d__1) / basl; 00398 L220: 00399 work[i__ + (*n << 2)] = work[i__ + (*n << 2)] - ta - tb; 00400 work[j + *n * 5] = work[j + *n * 5] - ta - tb; 00401 /* L230: */ 00402 } 00403 /* L240: */ 00404 } 00405 00406 coef = 1. / (Treal) (nr << 1); 00407 coef2 = coef * coef; 00408 coef5 = coef2 * .5; 00409 nrp2 = nr + 2; 00410 beta = 0.; 00411 it = 1; 00412 00413 /* Start generalized conjugate gradient iteration */ 00414 00415 L250: 00416 00417 gamma = template_blas_dot(&nr, &work[*ilo + (*n << 2)], &c__1, &work[*ilo + (*n << 2)] 00418 , &c__1) + template_blas_dot(&nr, &work[*ilo + *n * 5], &c__1, &work[*ilo + * 00419 n * 5], &c__1); 00420 00421 ew = 0.; 00422 ewc = 0.; 00423 i__1 = *ihi; 00424 for (i__ = *ilo; i__ <= i__1; ++i__) { 00425 ew += work[i__ + (*n << 2)]; 00426 ewc += work[i__ + *n * 5]; 00427 /* L260: */ 00428 } 00429 00430 /* Computing 2nd power */ 00431 d__1 = ew; 00432 /* Computing 2nd power */ 00433 d__2 = ewc; 00434 /* Computing 2nd power */ 00435 d__3 = ew - ewc; 00436 gamma = coef * gamma - coef2 * (d__1 * d__1 + d__2 * d__2) - coef5 * ( 00437 d__3 * d__3); 00438 if (gamma == 0.) { 00439 goto L350; 00440 } 00441 if (it != 1) { 00442 beta = gamma / pgamma; 00443 } 00444 t = coef5 * (ewc - ew * 3.); 00445 tc = coef5 * (ew - ewc * 3.); 00446 00447 template_blas_scal(&nr, &beta, &work[*ilo], &c__1); 00448 template_blas_scal(&nr, &beta, &work[*ilo + *n], &c__1); 00449 00450 template_blas_axpy(&nr, &coef, &work[*ilo + (*n << 2)], &c__1, &work[*ilo + *n], & 00451 c__1); 00452 template_blas_axpy(&nr, &coef, &work[*ilo + *n * 5], &c__1, &work[*ilo], &c__1); 00453 00454 i__1 = *ihi; 00455 for (i__ = *ilo; i__ <= i__1; ++i__) { 00456 work[i__] += tc; 00457 work[i__ + *n] += t; 00458 /* L270: */ 00459 } 00460 00461 /* Apply matrix to vector */ 00462 00463 i__1 = *ihi; 00464 for (i__ = *ilo; i__ <= i__1; ++i__) { 00465 kount = 0; 00466 sum = 0.; 00467 i__2 = *ihi; 00468 for (j = *ilo; j <= i__2; ++j) { 00469 if (a_ref(i__, j) == 0.) { 00470 goto L280; 00471 } 00472 ++kount; 00473 sum += work[j]; 00474 L280: 00475 if (b_ref(i__, j) == 0.) { 00476 goto L290; 00477 } 00478 ++kount; 00479 sum += work[j]; 00480 L290: 00481 ; 00482 } 00483 work[i__ + (*n << 1)] = (Treal) kount * work[i__ + *n] + sum; 00484 /* L300: */ 00485 } 00486 00487 i__1 = *ihi; 00488 for (j = *ilo; j <= i__1; ++j) { 00489 kount = 0; 00490 sum = 0.; 00491 i__2 = *ihi; 00492 for (i__ = *ilo; i__ <= i__2; ++i__) { 00493 if (a_ref(i__, j) == 0.) { 00494 goto L310; 00495 } 00496 ++kount; 00497 sum += work[i__ + *n]; 00498 L310: 00499 if (b_ref(i__, j) == 0.) { 00500 goto L320; 00501 } 00502 ++kount; 00503 sum += work[i__ + *n]; 00504 L320: 00505 ; 00506 } 00507 work[j + *n * 3] = (Treal) kount * work[j] + sum; 00508 /* L330: */ 00509 } 00510 00511 sum = template_blas_dot(&nr, &work[*ilo + *n], &c__1, &work[*ilo + (*n << 1)], &c__1) 00512 + template_blas_dot(&nr, &work[*ilo], &c__1, &work[*ilo + *n * 3], &c__1); 00513 alpha = gamma / sum; 00514 00515 /* Determine correction to current iteration */ 00516 00517 cmax = 0.; 00518 i__1 = *ihi; 00519 for (i__ = *ilo; i__ <= i__1; ++i__) { 00520 cor = alpha * work[i__ + *n]; 00521 if (absMACRO(cor) > cmax) { 00522 cmax = absMACRO(cor); 00523 } 00524 lscale[i__] += cor; 00525 cor = alpha * work[i__]; 00526 if (absMACRO(cor) > cmax) { 00527 cmax = absMACRO(cor); 00528 } 00529 rscale[i__] += cor; 00530 /* L340: */ 00531 } 00532 if (cmax < .5) { 00533 goto L350; 00534 } 00535 00536 d__1 = -alpha; 00537 template_blas_axpy(&nr, &d__1, &work[*ilo + (*n << 1)], &c__1, &work[*ilo + (*n << 2)] 00538 , &c__1); 00539 d__1 = -alpha; 00540 template_blas_axpy(&nr, &d__1, &work[*ilo + *n * 3], &c__1, &work[*ilo + *n * 5], & 00541 c__1); 00542 00543 pgamma = gamma; 00544 ++it; 00545 if (it <= nrp2) { 00546 goto L250; 00547 } 00548 00549 /* End generalized conjugate gradient iteration */ 00550 00551 L350: 00552 sfmin = template_lapack_lamch("S", (Treal)0); 00553 sfmax = 1. / sfmin; 00554 lsfmin = (integer) (template_blas_lg10(&sfmin) / basl + 1.); 00555 lsfmax = (integer) (template_blas_lg10(&sfmax) / basl); 00556 i__1 = *ihi; 00557 for (i__ = *ilo; i__ <= i__1; ++i__) { 00558 i__2 = *n - *ilo + 1; 00559 irab = template_blas_idamax(&i__2, &a_ref(i__, *ilo), lda); 00560 rab = (d__1 = a_ref(i__, irab + *ilo - 1), absMACRO(d__1)); 00561 i__2 = *n - *ilo + 1; 00562 irab = template_blas_idamax(&i__2, &b_ref(i__, *ilo), lda); 00563 /* Computing MAX */ 00564 d__2 = rab, d__3 = (d__1 = b_ref(i__, irab + *ilo - 1), absMACRO(d__1)); 00565 rab = maxMACRO(d__2,d__3); 00566 d__1 = rab + sfmin; 00567 lrab = (integer) (template_blas_lg10(&d__1) / basl + 1.); 00568 ir = (integer) (lscale[i__] + template_lapack_d_sign(&c_b70, &lscale[i__])); 00569 /* Computing MIN */ 00570 i__2 = maxMACRO(ir,lsfmin), i__2 = minMACRO(i__2,lsfmax), i__3 = lsfmax - lrab; 00571 ir = minMACRO(i__2,i__3); 00572 lscale[i__] = template_lapack_pow_di(&c_b34, &ir); 00573 icab = template_blas_idamax(ihi, &a_ref(1, i__), &c__1); 00574 cab = (d__1 = a_ref(icab, i__), absMACRO(d__1)); 00575 icab = template_blas_idamax(ihi, &b_ref(1, i__), &c__1); 00576 /* Computing MAX */ 00577 d__2 = cab, d__3 = (d__1 = b_ref(icab, i__), absMACRO(d__1)); 00578 cab = maxMACRO(d__2,d__3); 00579 d__1 = cab + sfmin; 00580 lcab = (integer) (template_blas_lg10(&d__1) / basl + 1.); 00581 jc = (integer) (rscale[i__] + template_lapack_d_sign(&c_b70, &rscale[i__])); 00582 /* Computing MIN */ 00583 i__2 = maxMACRO(jc,lsfmin), i__2 = minMACRO(i__2,lsfmax), i__3 = lsfmax - lcab; 00584 jc = minMACRO(i__2,i__3); 00585 rscale[i__] = template_lapack_pow_di(&c_b34, &jc); 00586 /* L360: */ 00587 } 00588 00589 /* Row scaling of matrices A and B */ 00590 00591 i__1 = *ihi; 00592 for (i__ = *ilo; i__ <= i__1; ++i__) { 00593 i__2 = *n - *ilo + 1; 00594 template_blas_scal(&i__2, &lscale[i__], &a_ref(i__, *ilo), lda); 00595 i__2 = *n - *ilo + 1; 00596 template_blas_scal(&i__2, &lscale[i__], &b_ref(i__, *ilo), ldb); 00597 /* L370: */ 00598 } 00599 00600 /* Column scaling of matrices A and B */ 00601 00602 i__1 = *ihi; 00603 for (j = *ilo; j <= i__1; ++j) { 00604 template_blas_scal(ihi, &rscale[j], &a_ref(1, j), &c__1); 00605 template_blas_scal(ihi, &rscale[j], &b_ref(1, j), &c__1); 00606 /* L380: */ 00607 } 00608 00609 return 0; 00610 00611 /* End of DGGBAL */ 00612 00613 } /* dggbal_ */ 00614 00615 #undef b_ref 00616 #undef a_ref 00617 00618 00619 #endif