ergo
|
00001 /* Ergo, version 3.2, a program for linear scaling electronic structure 00002 * calculations. 00003 * Copyright (C) 2012 Elias Rudberg, Emanuel H. Rubensson, and Pawel Salek. 00004 * 00005 * This program is free software: you can redistribute it and/or modify 00006 * it under the terms of the GNU General Public License as published by 00007 * the Free Software Foundation, either version 3 of the License, or 00008 * (at your option) any later version. 00009 * 00010 * This program is distributed in the hope that it will be useful, 00011 * but WITHOUT ANY WARRANTY; without even the implied warranty of 00012 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00013 * GNU General Public License for more details. 00014 * 00015 * You should have received a copy of the GNU General Public License 00016 * along with this program. If not, see <http://www.gnu.org/licenses/>. 00017 * 00018 * Primary academic reference: 00019 * KohnâSham Density Functional Theory Electronic Structure Calculations 00020 * with Linearly Scaling Computational Time and Memory Usage, 00021 * Elias Rudberg, Emanuel H. Rubensson, and Pawel Salek, 00022 * J. Chem. Theory Comput. 7, 340 (2011), 00023 * <http://dx.doi.org/10.1021/ct100611z> 00024 * 00025 * For further information about Ergo, see <http://www.ergoscf.org>. 00026 */ 00027 00028 /* This file belongs to the template_lapack part of the Ergo source 00029 * code. The source files in the template_lapack directory are modified 00030 * versions of files originally distributed as CLAPACK, see the 00031 * Copyright/license notice in the file template_lapack/COPYING. 00032 */ 00033 00034 00035 #ifndef TEMPLATE_LAPACK_SPGST_HEADER 00036 #define TEMPLATE_LAPACK_SPGST_HEADER 00037 00038 #include "template_lapack_common.h" 00039 00040 template<class Treal> 00041 int template_lapack_spgst(const integer *itype, const char *uplo, const integer *n, 00042 Treal *ap, const Treal *bp, integer *info) 00043 { 00044 /* -- LAPACK routine (version 3.0) -- 00045 Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., 00046 Courant Institute, Argonne National Lab, and Rice University 00047 March 31, 1993 00048 00049 00050 Purpose 00051 ======= 00052 00053 DSPGST reduces a real symmetric-definite generalized eigenproblem 00054 to standard form, using packed storage. 00055 00056 If ITYPE = 1, the problem is A*x = lambda*B*x, 00057 and A is overwritten by inv(U**T)*A*inv(U) or inv(L)*A*inv(L**T) 00058 00059 If ITYPE = 2 or 3, the problem is A*B*x = lambda*x or 00060 B*A*x = lambda*x, and A is overwritten by U*A*U**T or L**T*A*L. 00061 00062 B must have been previously factorized as U**T*U or L*L**T by DPPTRF. 00063 00064 Arguments 00065 ========= 00066 00067 ITYPE (input) INTEGER 00068 = 1: compute inv(U**T)*A*inv(U) or inv(L)*A*inv(L**T); 00069 = 2 or 3: compute U*A*U**T or L**T*A*L. 00070 00071 UPLO (input) CHARACTER 00072 = 'U': Upper triangle of A is stored and B is factored as 00073 U**T*U; 00074 = 'L': Lower triangle of A is stored and B is factored as 00075 L*L**T. 00076 00077 N (input) INTEGER 00078 The order of the matrices A and B. N >= 0. 00079 00080 AP (input/output) DOUBLE PRECISION array, dimension (N*(N+1)/2) 00081 On entry, the upper or lower triangle of the symmetric matrix 00082 A, packed columnwise in a linear array. The j-th column of A 00083 is stored in the array AP as follows: 00084 if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; 00085 if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = A(i,j) for j<=i<=n. 00086 00087 On exit, if INFO = 0, the transformed matrix, stored in the 00088 same format as A. 00089 00090 BP (input) DOUBLE PRECISION array, dimension (N*(N+1)/2) 00091 The triangular factor from the Cholesky factorization of B, 00092 stored in the same format as A, as returned by DPPTRF. 00093 00094 INFO (output) INTEGER 00095 = 0: successful exit 00096 < 0: if INFO = -i, the i-th argument had an illegal value 00097 00098 ===================================================================== 00099 00100 00101 Test the input parameters. 00102 00103 Parameter adjustments */ 00104 /* Table of constant values */ 00105 integer c__1 = 1; 00106 Treal c_b9 = -1.; 00107 Treal c_b11 = 1.; 00108 00109 /* System generated locals */ 00110 integer i__1, i__2; 00111 Treal d__1; 00112 /* Local variables */ 00113 integer j, k; 00114 logical upper; 00115 integer j1, k1; 00116 integer jj, kk; 00117 Treal ct; 00118 Treal ajj; 00119 integer j1j1; 00120 Treal akk; 00121 integer k1k1; 00122 Treal bjj, bkk; 00123 00124 00125 --bp; 00126 --ap; 00127 00128 /* Function Body */ 00129 *info = 0; 00130 upper = template_blas_lsame(uplo, "U"); 00131 if (*itype < 1 || *itype > 3) { 00132 *info = -1; 00133 } else if (! upper && ! template_blas_lsame(uplo, "L")) { 00134 *info = -2; 00135 } else if (*n < 0) { 00136 *info = -3; 00137 } 00138 if (*info != 0) { 00139 i__1 = -(*info); 00140 template_blas_erbla("SPGST ", &i__1); 00141 return 0; 00142 } 00143 00144 if (*itype == 1) { 00145 if (upper) { 00146 00147 /* Compute inv(U')*A*inv(U) 00148 00149 J1 and JJ are the indices of A(1,j) and A(j,j) */ 00150 00151 jj = 0; 00152 i__1 = *n; 00153 for (j = 1; j <= i__1; ++j) { 00154 j1 = jj + 1; 00155 jj += j; 00156 00157 /* Compute the j-th column of the upper triangle of A */ 00158 00159 bjj = bp[jj]; 00160 template_blas_tpsv(uplo, "Transpose", "Nonunit", &j, &bp[1], &ap[j1], & 00161 c__1); 00162 i__2 = j - 1; 00163 template_blas_spmv(uplo, &i__2, &c_b9, &ap[1], &bp[j1], &c__1, &c_b11, & 00164 ap[j1], &c__1); 00165 i__2 = j - 1; 00166 d__1 = 1. / bjj; 00167 template_blas_scal(&i__2, &d__1, &ap[j1], &c__1); 00168 i__2 = j - 1; 00169 ap[jj] = (ap[jj] - template_blas_dot(&i__2, &ap[j1], &c__1, &bp[j1], & 00170 c__1)) / bjj; 00171 /* L10: */ 00172 } 00173 } else { 00174 00175 /* Compute inv(L)*A*inv(L') 00176 00177 KK and K1K1 are the indices of A(k,k) and A(k+1,k+1) */ 00178 00179 kk = 1; 00180 i__1 = *n; 00181 for (k = 1; k <= i__1; ++k) { 00182 k1k1 = kk + *n - k + 1; 00183 00184 /* Update the lower triangle of A(k:n,k:n) */ 00185 00186 akk = ap[kk]; 00187 bkk = bp[kk]; 00188 /* Computing 2nd power */ 00189 d__1 = bkk; 00190 akk /= d__1 * d__1; 00191 ap[kk] = akk; 00192 if (k < *n) { 00193 i__2 = *n - k; 00194 d__1 = 1. / bkk; 00195 template_blas_scal(&i__2, &d__1, &ap[kk + 1], &c__1); 00196 ct = akk * -.5; 00197 i__2 = *n - k; 00198 template_blas_axpy(&i__2, &ct, &bp[kk + 1], &c__1, &ap[kk + 1], &c__1) 00199 ; 00200 i__2 = *n - k; 00201 template_blas_spr2(uplo, &i__2, &c_b9, &ap[kk + 1], &c__1, &bp[kk + 1] 00202 , &c__1, &ap[k1k1]); 00203 i__2 = *n - k; 00204 template_blas_axpy(&i__2, &ct, &bp[kk + 1], &c__1, &ap[kk + 1], &c__1) 00205 ; 00206 i__2 = *n - k; 00207 template_blas_tpsv(uplo, "No transpose", "Non-unit", &i__2, &bp[k1k1], 00208 &ap[kk + 1], &c__1); 00209 } 00210 kk = k1k1; 00211 /* L20: */ 00212 } 00213 } 00214 } else { 00215 if (upper) { 00216 00217 /* Compute U*A*U' 00218 00219 K1 and KK are the indices of A(1,k) and A(k,k) */ 00220 00221 kk = 0; 00222 i__1 = *n; 00223 for (k = 1; k <= i__1; ++k) { 00224 k1 = kk + 1; 00225 kk += k; 00226 00227 /* Update the upper triangle of A(1:k,1:k) */ 00228 00229 akk = ap[kk]; 00230 bkk = bp[kk]; 00231 i__2 = k - 1; 00232 template_blas_tpmv(uplo, "No transpose", "Non-unit", &i__2, &bp[1], &ap[ 00233 k1], &c__1); 00234 ct = akk * .5; 00235 i__2 = k - 1; 00236 template_blas_axpy(&i__2, &ct, &bp[k1], &c__1, &ap[k1], &c__1); 00237 i__2 = k - 1; 00238 template_blas_spr2(uplo, &i__2, &c_b11, &ap[k1], &c__1, &bp[k1], &c__1, & 00239 ap[1]); 00240 i__2 = k - 1; 00241 template_blas_axpy(&i__2, &ct, &bp[k1], &c__1, &ap[k1], &c__1); 00242 i__2 = k - 1; 00243 template_blas_scal(&i__2, &bkk, &ap[k1], &c__1); 00244 /* Computing 2nd power */ 00245 d__1 = bkk; 00246 ap[kk] = akk * (d__1 * d__1); 00247 /* L30: */ 00248 } 00249 } else { 00250 00251 /* Compute L'*A*L 00252 00253 JJ and J1J1 are the indices of A(j,j) and A(j+1,j+1) */ 00254 00255 jj = 1; 00256 i__1 = *n; 00257 for (j = 1; j <= i__1; ++j) { 00258 j1j1 = jj + *n - j + 1; 00259 00260 /* Compute the j-th column of the lower triangle of A */ 00261 00262 ajj = ap[jj]; 00263 bjj = bp[jj]; 00264 i__2 = *n - j; 00265 ap[jj] = ajj * bjj + template_blas_dot(&i__2, &ap[jj + 1], &c__1, &bp[jj 00266 + 1], &c__1); 00267 i__2 = *n - j; 00268 template_blas_scal(&i__2, &bjj, &ap[jj + 1], &c__1); 00269 i__2 = *n - j; 00270 template_blas_spmv(uplo, &i__2, &c_b11, &ap[j1j1], &bp[jj + 1], &c__1, & 00271 c_b11, &ap[jj + 1], &c__1); 00272 i__2 = *n - j + 1; 00273 template_blas_tpmv(uplo, "Transpose", "Non-unit", &i__2, &bp[jj], &ap[jj], 00274 &c__1); 00275 jj = j1j1; 00276 /* L40: */ 00277 } 00278 } 00279 } 00280 return 0; 00281 00282 /* End of DSPGST */ 00283 00284 } /* dspgst_ */ 00285 00286 #endif