ergo
|
00001 /* Ergo, version 3.2, a program for linear scaling electronic structure 00002 * calculations. 00003 * Copyright (C) 2012 Elias Rudberg, Emanuel H. Rubensson, and Pawel Salek. 00004 * 00005 * This program is free software: you can redistribute it and/or modify 00006 * it under the terms of the GNU General Public License as published by 00007 * the Free Software Foundation, either version 3 of the License, or 00008 * (at your option) any later version. 00009 * 00010 * This program is distributed in the hope that it will be useful, 00011 * but WITHOUT ANY WARRANTY; without even the implied warranty of 00012 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00013 * GNU General Public License for more details. 00014 * 00015 * You should have received a copy of the GNU General Public License 00016 * along with this program. If not, see <http://www.gnu.org/licenses/>. 00017 * 00018 * Primary academic reference: 00019 * KohnâSham Density Functional Theory Electronic Structure Calculations 00020 * with Linearly Scaling Computational Time and Memory Usage, 00021 * Elias Rudberg, Emanuel H. Rubensson, and Pawel Salek, 00022 * J. Chem. Theory Comput. 7, 340 (2011), 00023 * <http://dx.doi.org/10.1021/ct100611z> 00024 * 00025 * For further information about Ergo, see <http://www.ergoscf.org>. 00026 */ 00027 00028 /* This file belongs to the template_lapack part of the Ergo source 00029 * code. The source files in the template_lapack directory are modified 00030 * versions of files originally distributed as CLAPACK, see the 00031 * Copyright/license notice in the file template_lapack/COPYING. 00032 */ 00033 00034 00035 #ifndef TEMPLATE_LAPACK_ORGQR_HEADER 00036 #define TEMPLATE_LAPACK_ORGQR_HEADER 00037 00038 #include "template_lapack_common.h" 00039 00040 template<class Treal> 00041 int template_lapack_orgqr( 00042 const integer *m, 00043 const integer *n, 00044 const integer *k, 00045 Treal * a, 00046 const integer *lda, 00047 const Treal *tau, 00048 Treal *work, 00049 const integer *lwork, 00050 integer *info 00051 ) 00052 { 00053 /* -- LAPACK routine (version 3.0) -- 00054 Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., 00055 Courant Institute, Argonne National Lab, and Rice University 00056 June 30, 1999 00057 00058 00059 Purpose 00060 ======= 00061 00062 DORGQR generates an M-by-N real matrix Q with orthonormal columns, 00063 which is defined as the first N columns of a product of K elementary 00064 reflectors of order M 00065 00066 Q = H(1) H(2) . . . H(k) 00067 00068 as returned by DGEQRF. 00069 00070 Arguments 00071 ========= 00072 00073 M (input) INTEGER 00074 The number of rows of the matrix Q. M >= 0. 00075 00076 N (input) INTEGER 00077 The number of columns of the matrix Q. M >= N >= 0. 00078 00079 K (input) INTEGER 00080 The number of elementary reflectors whose product defines the 00081 matrix Q. N >= K >= 0. 00082 00083 A (input/output) DOUBLE PRECISION array, dimension (LDA,N) 00084 On entry, the i-th column must contain the vector which 00085 defines the elementary reflector H(i), for i = 1,2,...,k, as 00086 returned by DGEQRF in the first k columns of its array 00087 argument A. 00088 On exit, the M-by-N matrix Q. 00089 00090 LDA (input) INTEGER 00091 The first dimension of the array A. LDA >= max(1,M). 00092 00093 TAU (input) DOUBLE PRECISION array, dimension (K) 00094 TAU(i) must contain the scalar factor of the elementary 00095 reflector H(i), as returned by DGEQRF. 00096 00097 WORK (workspace/output) DOUBLE PRECISION array, dimension (LWORK) 00098 On exit, if INFO = 0, WORK(1) returns the optimal LWORK. 00099 00100 LWORK (input) INTEGER 00101 The dimension of the array WORK. LWORK >= max(1,N). 00102 For optimum performance LWORK >= N*NB, where NB is the 00103 optimal blocksize. 00104 00105 If LWORK = -1, then a workspace query is assumed; the routine 00106 only calculates the optimal size of the WORK array, returns 00107 this value as the first entry of the WORK array, and no error 00108 message related to LWORK is issued by XERBLA. 00109 00110 INFO (output) INTEGER 00111 = 0: successful exit 00112 < 0: if INFO = -i, the i-th argument has an illegal value 00113 00114 ===================================================================== 00115 00116 00117 Test the input arguments 00118 00119 Parameter adjustments */ 00120 /* Table of constant values */ 00121 integer c__1 = 1; 00122 integer c_n1 = -1; 00123 integer c__3 = 3; 00124 integer c__2 = 2; 00125 00126 /* System generated locals */ 00127 integer a_dim1, a_offset, i__1, i__2, i__3; 00128 /* Local variables */ 00129 integer i__, j, l, nbmin, iinfo; 00130 integer ib, nb, ki, kk; 00131 integer nx; 00132 integer ldwork, lwkopt; 00133 logical lquery; 00134 integer iws; 00135 #define a_ref(a_1,a_2) a[(a_2)*a_dim1 + a_1] 00136 00137 00138 a_dim1 = *lda; 00139 a_offset = 1 + a_dim1 * 1; 00140 a -= a_offset; 00141 --tau; 00142 --work; 00143 00144 /* Initialization added by Elias to get rid of compiler warnings. */ 00145 ki = 0; 00146 /* Function Body */ 00147 *info = 0; 00148 nb = template_lapack_ilaenv(&c__1, "DORGQR", " ", m, n, k, &c_n1, (ftnlen)6, (ftnlen)1); 00149 lwkopt = maxMACRO(1,*n) * nb; 00150 work[1] = (Treal) lwkopt; 00151 lquery = *lwork == -1; 00152 if (*m < 0) { 00153 *info = -1; 00154 } else if (*n < 0 || *n > *m) { 00155 *info = -2; 00156 } else if (*k < 0 || *k > *n) { 00157 *info = -3; 00158 } else if (*lda < maxMACRO(1,*m)) { 00159 *info = -5; 00160 } else if (*lwork < maxMACRO(1,*n) && ! lquery) { 00161 *info = -8; 00162 } 00163 if (*info != 0) { 00164 i__1 = -(*info); 00165 template_blas_erbla("ORGQR ", &i__1); 00166 return 0; 00167 } else if (lquery) { 00168 return 0; 00169 } 00170 00171 /* Quick return if possible */ 00172 00173 if (*n <= 0) { 00174 work[1] = 1.; 00175 return 0; 00176 } 00177 00178 nbmin = 2; 00179 nx = 0; 00180 iws = *n; 00181 if (nb > 1 && nb < *k) { 00182 00183 /* Determine when to cross over from blocked to unblocked code. 00184 00185 Computing MAX */ 00186 i__1 = 0, i__2 = template_lapack_ilaenv(&c__3, "DORGQR", " ", m, n, k, &c_n1, ( 00187 ftnlen)6, (ftnlen)1); 00188 nx = maxMACRO(i__1,i__2); 00189 if (nx < *k) { 00190 00191 /* Determine if workspace is large enough for blocked code. */ 00192 00193 ldwork = *n; 00194 iws = ldwork * nb; 00195 if (*lwork < iws) { 00196 00197 /* Not enough workspace to use optimal NB: reduce NB and 00198 determine the minimum value of NB. */ 00199 00200 nb = *lwork / ldwork; 00201 /* Computing MAX */ 00202 i__1 = 2, i__2 = template_lapack_ilaenv(&c__2, "DORGQR", " ", m, n, k, &c_n1, 00203 (ftnlen)6, (ftnlen)1); 00204 nbmin = maxMACRO(i__1,i__2); 00205 } 00206 } 00207 } 00208 00209 if (nb >= nbmin && nb < *k && nx < *k) { 00210 00211 /* Use blocked code after the last block. 00212 The first kk columns are handled by the block method. */ 00213 00214 ki = (*k - nx - 1) / nb * nb; 00215 /* Computing MIN */ 00216 i__1 = *k, i__2 = ki + nb; 00217 kk = minMACRO(i__1,i__2); 00218 00219 /* Set A(1:kk,kk+1:n) to zero. */ 00220 00221 i__1 = *n; 00222 for (j = kk + 1; j <= i__1; ++j) { 00223 i__2 = kk; 00224 for (i__ = 1; i__ <= i__2; ++i__) { 00225 a_ref(i__, j) = 0.; 00226 /* L10: */ 00227 } 00228 /* L20: */ 00229 } 00230 } else { 00231 kk = 0; 00232 } 00233 00234 /* Use unblocked code for the last or only block. */ 00235 00236 if (kk < *n) { 00237 i__1 = *m - kk; 00238 i__2 = *n - kk; 00239 i__3 = *k - kk; 00240 template_lapack_org2r(&i__1, &i__2, &i__3, &a_ref(kk + 1, kk + 1), lda, &tau[kk + 1] 00241 , &work[1], &iinfo); 00242 } 00243 00244 if (kk > 0) { 00245 00246 /* Use blocked code */ 00247 00248 i__1 = -nb; 00249 for (i__ = ki + 1; i__1 < 0 ? i__ >= 1 : i__ <= 1; i__ += i__1) { 00250 /* Computing MIN */ 00251 i__2 = nb, i__3 = *k - i__ + 1; 00252 ib = minMACRO(i__2,i__3); 00253 if (i__ + ib <= *n) { 00254 00255 /* Form the triangular factor of the block reflector 00256 H = H(i) H(i+1) . . . H(i+ib-1) */ 00257 00258 i__2 = *m - i__ + 1; 00259 template_lapack_larft("Forward", "Columnwise", &i__2, &ib, &a_ref(i__, i__), 00260 lda, &tau[i__], &work[1], &ldwork); 00261 00262 /* Apply H to A(i:m,i+ib:n) from the left */ 00263 00264 i__2 = *m - i__ + 1; 00265 i__3 = *n - i__ - ib + 1; 00266 template_lapack_larfb("Left", "No transpose", "Forward", "Columnwise", & 00267 i__2, &i__3, &ib, &a_ref(i__, i__), lda, &work[1], & 00268 ldwork, &a_ref(i__, i__ + ib), lda, &work[ib + 1], & 00269 ldwork); 00270 } 00271 00272 /* Apply H to rows i:m of current block */ 00273 00274 i__2 = *m - i__ + 1; 00275 template_lapack_org2r(&i__2, &ib, &ib, &a_ref(i__, i__), lda, &tau[i__], &work[ 00276 1], &iinfo); 00277 00278 /* Set rows 1:i-1 of current block to zero */ 00279 00280 i__2 = i__ + ib - 1; 00281 for (j = i__; j <= i__2; ++j) { 00282 i__3 = i__ - 1; 00283 for (l = 1; l <= i__3; ++l) { 00284 a_ref(l, j) = 0.; 00285 /* L30: */ 00286 } 00287 /* L40: */ 00288 } 00289 /* L50: */ 00290 } 00291 } 00292 00293 work[1] = (Treal) iws; 00294 return 0; 00295 00296 /* End of DORGQR */ 00297 00298 } /* dorgqr_ */ 00299 00300 #undef a_ref 00301 00302 00303 #endif