ergo
|
00001 /* Ergo, version 3.2, a program for linear scaling electronic structure 00002 * calculations. 00003 * Copyright (C) 2012 Elias Rudberg, Emanuel H. Rubensson, and Pawel Salek. 00004 * 00005 * This program is free software: you can redistribute it and/or modify 00006 * it under the terms of the GNU General Public License as published by 00007 * the Free Software Foundation, either version 3 of the License, or 00008 * (at your option) any later version. 00009 * 00010 * This program is distributed in the hope that it will be useful, 00011 * but WITHOUT ANY WARRANTY; without even the implied warranty of 00012 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00013 * GNU General Public License for more details. 00014 * 00015 * You should have received a copy of the GNU General Public License 00016 * along with this program. If not, see <http://www.gnu.org/licenses/>. 00017 * 00018 * Primary academic reference: 00019 * KohnâSham Density Functional Theory Electronic Structure Calculations 00020 * with Linearly Scaling Computational Time and Memory Usage, 00021 * Elias Rudberg, Emanuel H. Rubensson, and Pawel Salek, 00022 * J. Chem. Theory Comput. 7, 340 (2011), 00023 * <http://dx.doi.org/10.1021/ct100611z> 00024 * 00025 * For further information about Ergo, see <http://www.ergoscf.org>. 00026 */ 00027 00028 /* This file belongs to the template_lapack part of the Ergo source 00029 * code. The source files in the template_lapack directory are modified 00030 * versions of files originally distributed as CLAPACK, see the 00031 * Copyright/license notice in the file template_lapack/COPYING. 00032 */ 00033 00034 00035 #ifndef TEMPLATE_BLAS_TRSM_HEADER 00036 #define TEMPLATE_BLAS_TRSM_HEADER 00037 00038 #include "template_blas_common.h" 00039 00040 template<class Treal> 00041 int template_blas_trsm(const char *side, const char *uplo, const char *transa, const char *diag, 00042 const integer *m, const integer *n, const Treal *alpha, const Treal *a, const integer * 00043 lda, Treal *b, const integer *ldb) 00044 { 00045 /* System generated locals */ 00046 integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2, i__3; 00047 /* Local variables */ 00048 integer info; 00049 Treal temp; 00050 integer i__, j, k; 00051 logical lside; 00052 integer nrowa; 00053 logical upper; 00054 logical nounit; 00055 #define a_ref(a_1,a_2) a[(a_2)*a_dim1 + a_1] 00056 #define b_ref(a_1,a_2) b[(a_2)*b_dim1 + a_1] 00057 /* Purpose 00058 ======= 00059 DTRSM solves one of the matrix equations 00060 op( A )*X = alpha*B, or X*op( A ) = alpha*B, 00061 where alpha is a scalar, X and B are m by n matrices, A is a unit, or 00062 non-unit, upper or lower triangular matrix and op( A ) is one of 00063 op( A ) = A or op( A ) = A'. 00064 The matrix X is overwritten on B. 00065 Parameters 00066 ========== 00067 SIDE - CHARACTER*1. 00068 On entry, SIDE specifies whether op( A ) appears on the left 00069 or right of X as follows: 00070 SIDE = 'L' or 'l' op( A )*X = alpha*B. 00071 SIDE = 'R' or 'r' X*op( A ) = alpha*B. 00072 Unchanged on exit. 00073 UPLO - CHARACTER*1. 00074 On entry, UPLO specifies whether the matrix A is an upper or 00075 lower triangular matrix as follows: 00076 UPLO = 'U' or 'u' A is an upper triangular matrix. 00077 UPLO = 'L' or 'l' A is a lower triangular matrix. 00078 Unchanged on exit. 00079 TRANSA - CHARACTER*1. 00080 On entry, TRANSA specifies the form of op( A ) to be used in 00081 the matrix multiplication as follows: 00082 TRANSA = 'N' or 'n' op( A ) = A. 00083 TRANSA = 'T' or 't' op( A ) = A'. 00084 TRANSA = 'C' or 'c' op( A ) = A'. 00085 Unchanged on exit. 00086 DIAG - CHARACTER*1. 00087 On entry, DIAG specifies whether or not A is unit triangular 00088 as follows: 00089 DIAG = 'U' or 'u' A is assumed to be unit triangular. 00090 DIAG = 'N' or 'n' A is not assumed to be unit 00091 triangular. 00092 Unchanged on exit. 00093 M - INTEGER. 00094 On entry, M specifies the number of rows of B. M must be at 00095 least zero. 00096 Unchanged on exit. 00097 N - INTEGER. 00098 On entry, N specifies the number of columns of B. N must be 00099 at least zero. 00100 Unchanged on exit. 00101 ALPHA - DOUBLE PRECISION. 00102 On entry, ALPHA specifies the scalar alpha. When alpha is 00103 zero then A is not referenced and B need not be set before 00104 entry. 00105 Unchanged on exit. 00106 A - DOUBLE PRECISION array of DIMENSION ( LDA, k ), where k is m 00107 when SIDE = 'L' or 'l' and is n when SIDE = 'R' or 'r'. 00108 Before entry with UPLO = 'U' or 'u', the leading k by k 00109 upper triangular part of the array A must contain the upper 00110 triangular matrix and the strictly lower triangular part of 00111 A is not referenced. 00112 Before entry with UPLO = 'L' or 'l', the leading k by k 00113 lower triangular part of the array A must contain the lower 00114 triangular matrix and the strictly upper triangular part of 00115 A is not referenced. 00116 Note that when DIAG = 'U' or 'u', the diagonal elements of 00117 A are not referenced either, but are assumed to be unity. 00118 Unchanged on exit. 00119 LDA - INTEGER. 00120 On entry, LDA specifies the first dimension of A as declared 00121 in the calling (sub) program. When SIDE = 'L' or 'l' then 00122 LDA must be at least max( 1, m ), when SIDE = 'R' or 'r' 00123 then LDA must be at least max( 1, n ). 00124 Unchanged on exit. 00125 B - DOUBLE PRECISION array of DIMENSION ( LDB, n ). 00126 Before entry, the leading m by n part of the array B must 00127 contain the right-hand side matrix B, and on exit is 00128 overwritten by the solution matrix X. 00129 LDB - INTEGER. 00130 On entry, LDB specifies the first dimension of B as declared 00131 in the calling (sub) program. LDB must be at least 00132 max( 1, m ). 00133 Unchanged on exit. 00134 Level 3 Blas routine. 00135 -- Written on 8-February-1989. 00136 Jack Dongarra, Argonne National Laboratory. 00137 Iain Duff, AERE Harwell. 00138 Jeremy Du Croz, Numerical Algorithms Group Ltd. 00139 Sven Hammarling, Numerical Algorithms Group Ltd. 00140 Test the input parameters. 00141 Parameter adjustments */ 00142 a_dim1 = *lda; 00143 a_offset = 1 + a_dim1 * 1; 00144 a -= a_offset; 00145 b_dim1 = *ldb; 00146 b_offset = 1 + b_dim1 * 1; 00147 b -= b_offset; 00148 /* Function Body */ 00149 lside = template_blas_lsame(side, "L"); 00150 if (lside) { 00151 nrowa = *m; 00152 } else { 00153 nrowa = *n; 00154 } 00155 nounit = template_blas_lsame(diag, "N"); 00156 upper = template_blas_lsame(uplo, "U"); 00157 info = 0; 00158 if (! lside && ! template_blas_lsame(side, "R")) { 00159 info = 1; 00160 } else if (! upper && ! template_blas_lsame(uplo, "L")) { 00161 info = 2; 00162 } else if (! template_blas_lsame(transa, "N") && ! template_blas_lsame(transa, 00163 "T") && ! template_blas_lsame(transa, "C")) { 00164 info = 3; 00165 } else if (! template_blas_lsame(diag, "U") && ! template_blas_lsame(diag, 00166 "N")) { 00167 info = 4; 00168 } else if (*m < 0) { 00169 info = 5; 00170 } else if (*n < 0) { 00171 info = 6; 00172 } else if (*lda < maxMACRO(1,nrowa)) { 00173 info = 9; 00174 } else if (*ldb < maxMACRO(1,*m)) { 00175 info = 11; 00176 } 00177 if (info != 0) { 00178 template_blas_erbla("TRSM ", &info); 00179 return 0; 00180 } 00181 /* Quick return if possible. */ 00182 if (*n == 0) { 00183 return 0; 00184 } 00185 /* And when alpha.eq.zero. */ 00186 if (*alpha == 0.) { 00187 i__1 = *n; 00188 for (j = 1; j <= i__1; ++j) { 00189 i__2 = *m; 00190 for (i__ = 1; i__ <= i__2; ++i__) { 00191 b_ref(i__, j) = 0.; 00192 /* L10: */ 00193 } 00194 /* L20: */ 00195 } 00196 return 0; 00197 } 00198 /* Start the operations. */ 00199 if (lside) { 00200 if (template_blas_lsame(transa, "N")) { 00201 /* Form B := alpha*inv( A )*B. */ 00202 if (upper) { 00203 i__1 = *n; 00204 for (j = 1; j <= i__1; ++j) { 00205 if (*alpha != 1.) { 00206 i__2 = *m; 00207 for (i__ = 1; i__ <= i__2; ++i__) { 00208 b_ref(i__, j) = *alpha * b_ref(i__, j); 00209 /* L30: */ 00210 } 00211 } 00212 for (k = *m; k >= 1; --k) { 00213 if (b_ref(k, j) != 0.) { 00214 if (nounit) { 00215 b_ref(k, j) = b_ref(k, j) / a_ref(k, k); 00216 } 00217 i__2 = k - 1; 00218 for (i__ = 1; i__ <= i__2; ++i__) { 00219 b_ref(i__, j) = b_ref(i__, j) - b_ref(k, j) * 00220 a_ref(i__, k); 00221 /* L40: */ 00222 } 00223 } 00224 /* L50: */ 00225 } 00226 /* L60: */ 00227 } 00228 } else { 00229 i__1 = *n; 00230 for (j = 1; j <= i__1; ++j) { 00231 if (*alpha != 1.) { 00232 i__2 = *m; 00233 for (i__ = 1; i__ <= i__2; ++i__) { 00234 b_ref(i__, j) = *alpha * b_ref(i__, j); 00235 /* L70: */ 00236 } 00237 } 00238 i__2 = *m; 00239 for (k = 1; k <= i__2; ++k) { 00240 if (b_ref(k, j) != 0.) { 00241 if (nounit) { 00242 b_ref(k, j) = b_ref(k, j) / a_ref(k, k); 00243 } 00244 i__3 = *m; 00245 for (i__ = k + 1; i__ <= i__3; ++i__) { 00246 b_ref(i__, j) = b_ref(i__, j) - b_ref(k, j) * 00247 a_ref(i__, k); 00248 /* L80: */ 00249 } 00250 } 00251 /* L90: */ 00252 } 00253 /* L100: */ 00254 } 00255 } 00256 } else { 00257 /* Form B := alpha*inv( A' )*B. */ 00258 if (upper) { 00259 i__1 = *n; 00260 for (j = 1; j <= i__1; ++j) { 00261 i__2 = *m; 00262 for (i__ = 1; i__ <= i__2; ++i__) { 00263 temp = *alpha * b_ref(i__, j); 00264 i__3 = i__ - 1; 00265 for (k = 1; k <= i__3; ++k) { 00266 temp -= a_ref(k, i__) * b_ref(k, j); 00267 /* L110: */ 00268 } 00269 if (nounit) { 00270 temp /= a_ref(i__, i__); 00271 } 00272 b_ref(i__, j) = temp; 00273 /* L120: */ 00274 } 00275 /* L130: */ 00276 } 00277 } else { 00278 i__1 = *n; 00279 for (j = 1; j <= i__1; ++j) { 00280 for (i__ = *m; i__ >= 1; --i__) { 00281 temp = *alpha * b_ref(i__, j); 00282 i__2 = *m; 00283 for (k = i__ + 1; k <= i__2; ++k) { 00284 temp -= a_ref(k, i__) * b_ref(k, j); 00285 /* L140: */ 00286 } 00287 if (nounit) { 00288 temp /= a_ref(i__, i__); 00289 } 00290 b_ref(i__, j) = temp; 00291 /* L150: */ 00292 } 00293 /* L160: */ 00294 } 00295 } 00296 } 00297 } else { 00298 if (template_blas_lsame(transa, "N")) { 00299 /* Form B := alpha*B*inv( A ). */ 00300 if (upper) { 00301 i__1 = *n; 00302 for (j = 1; j <= i__1; ++j) { 00303 if (*alpha != 1.) { 00304 i__2 = *m; 00305 for (i__ = 1; i__ <= i__2; ++i__) { 00306 b_ref(i__, j) = *alpha * b_ref(i__, j); 00307 /* L170: */ 00308 } 00309 } 00310 i__2 = j - 1; 00311 for (k = 1; k <= i__2; ++k) { 00312 if (a_ref(k, j) != 0.) { 00313 i__3 = *m; 00314 for (i__ = 1; i__ <= i__3; ++i__) { 00315 b_ref(i__, j) = b_ref(i__, j) - a_ref(k, j) * 00316 b_ref(i__, k); 00317 /* L180: */ 00318 } 00319 } 00320 /* L190: */ 00321 } 00322 if (nounit) { 00323 temp = 1. / a_ref(j, j); 00324 i__2 = *m; 00325 for (i__ = 1; i__ <= i__2; ++i__) { 00326 b_ref(i__, j) = temp * b_ref(i__, j); 00327 /* L200: */ 00328 } 00329 } 00330 /* L210: */ 00331 } 00332 } else { 00333 for (j = *n; j >= 1; --j) { 00334 if (*alpha != 1.) { 00335 i__1 = *m; 00336 for (i__ = 1; i__ <= i__1; ++i__) { 00337 b_ref(i__, j) = *alpha * b_ref(i__, j); 00338 /* L220: */ 00339 } 00340 } 00341 i__1 = *n; 00342 for (k = j + 1; k <= i__1; ++k) { 00343 if (a_ref(k, j) != 0.) { 00344 i__2 = *m; 00345 for (i__ = 1; i__ <= i__2; ++i__) { 00346 b_ref(i__, j) = b_ref(i__, j) - a_ref(k, j) * 00347 b_ref(i__, k); 00348 /* L230: */ 00349 } 00350 } 00351 /* L240: */ 00352 } 00353 if (nounit) { 00354 temp = 1. / a_ref(j, j); 00355 i__1 = *m; 00356 for (i__ = 1; i__ <= i__1; ++i__) { 00357 b_ref(i__, j) = temp * b_ref(i__, j); 00358 /* L250: */ 00359 } 00360 } 00361 /* L260: */ 00362 } 00363 } 00364 } else { 00365 /* Form B := alpha*B*inv( A' ). */ 00366 if (upper) { 00367 for (k = *n; k >= 1; --k) { 00368 if (nounit) { 00369 temp = 1. / a_ref(k, k); 00370 i__1 = *m; 00371 for (i__ = 1; i__ <= i__1; ++i__) { 00372 b_ref(i__, k) = temp * b_ref(i__, k); 00373 /* L270: */ 00374 } 00375 } 00376 i__1 = k - 1; 00377 for (j = 1; j <= i__1; ++j) { 00378 if (a_ref(j, k) != 0.) { 00379 temp = a_ref(j, k); 00380 i__2 = *m; 00381 for (i__ = 1; i__ <= i__2; ++i__) { 00382 b_ref(i__, j) = b_ref(i__, j) - temp * b_ref( 00383 i__, k); 00384 /* L280: */ 00385 } 00386 } 00387 /* L290: */ 00388 } 00389 if (*alpha != 1.) { 00390 i__1 = *m; 00391 for (i__ = 1; i__ <= i__1; ++i__) { 00392 b_ref(i__, k) = *alpha * b_ref(i__, k); 00393 /* L300: */ 00394 } 00395 } 00396 /* L310: */ 00397 } 00398 } else { 00399 i__1 = *n; 00400 for (k = 1; k <= i__1; ++k) { 00401 if (nounit) { 00402 temp = 1. / a_ref(k, k); 00403 i__2 = *m; 00404 for (i__ = 1; i__ <= i__2; ++i__) { 00405 b_ref(i__, k) = temp * b_ref(i__, k); 00406 /* L320: */ 00407 } 00408 } 00409 i__2 = *n; 00410 for (j = k + 1; j <= i__2; ++j) { 00411 if (a_ref(j, k) != 0.) { 00412 temp = a_ref(j, k); 00413 i__3 = *m; 00414 for (i__ = 1; i__ <= i__3; ++i__) { 00415 b_ref(i__, j) = b_ref(i__, j) - temp * b_ref( 00416 i__, k); 00417 /* L330: */ 00418 } 00419 } 00420 /* L340: */ 00421 } 00422 if (*alpha != 1.) { 00423 i__2 = *m; 00424 for (i__ = 1; i__ <= i__2; ++i__) { 00425 b_ref(i__, k) = *alpha * b_ref(i__, k); 00426 /* L350: */ 00427 } 00428 } 00429 /* L360: */ 00430 } 00431 } 00432 } 00433 } 00434 return 0; 00435 /* End of DTRSM . */ 00436 } /* dtrsm_ */ 00437 #undef b_ref 00438 #undef a_ref 00439 00440 #endif