ergo
|
00001 /* Ergo, version 3.2, a program for linear scaling electronic structure 00002 * calculations. 00003 * Copyright (C) 2012 Elias Rudberg, Emanuel H. Rubensson, and Pawel Salek. 00004 * 00005 * This program is free software: you can redistribute it and/or modify 00006 * it under the terms of the GNU General Public License as published by 00007 * the Free Software Foundation, either version 3 of the License, or 00008 * (at your option) any later version. 00009 * 00010 * This program is distributed in the hope that it will be useful, 00011 * but WITHOUT ANY WARRANTY; without even the implied warranty of 00012 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00013 * GNU General Public License for more details. 00014 * 00015 * You should have received a copy of the GNU General Public License 00016 * along with this program. If not, see <http://www.gnu.org/licenses/>. 00017 * 00018 * Primary academic reference: 00019 * KohnâSham Density Functional Theory Electronic Structure Calculations 00020 * with Linearly Scaling Computational Time and Memory Usage, 00021 * Elias Rudberg, Emanuel H. Rubensson, and Pawel Salek, 00022 * J. Chem. Theory Comput. 7, 340 (2011), 00023 * <http://dx.doi.org/10.1021/ct100611z> 00024 * 00025 * For further information about Ergo, see <http://www.ergoscf.org>. 00026 */ 00027 00028 /* This file belongs to the template_lapack part of the Ergo source 00029 * code. The source files in the template_lapack directory are modified 00030 * versions of files originally distributed as CLAPACK, see the 00031 * Copyright/license notice in the file template_lapack/COPYING. 00032 */ 00033 00034 00035 #ifndef TEMPLATE_BLAS_TPMV_HEADER 00036 #define TEMPLATE_BLAS_TPMV_HEADER 00037 00038 00039 template<class Treal> 00040 int template_blas_tpmv(const char *uplo, const char *trans, const char *diag, const integer *n, 00041 const Treal *ap, Treal *x, const integer *incx) 00042 { 00043 /* System generated locals */ 00044 integer i__1, i__2; 00045 /* Local variables */ 00046 integer info; 00047 Treal temp; 00048 integer i__, j, k; 00049 integer kk, ix, jx, kx; 00050 logical nounit; 00051 /* Purpose 00052 ======= 00053 DTPMV performs one of the matrix-vector operations 00054 x := A*x, or x := A'*x, 00055 where x is an n element vector and A is an n by n unit, or non-unit, 00056 upper or lower triangular matrix, supplied in packed form. 00057 Parameters 00058 ========== 00059 UPLO - CHARACTER*1. 00060 On entry, UPLO specifies whether the matrix is an upper or 00061 lower triangular matrix as follows: 00062 UPLO = 'U' or 'u' A is an upper triangular matrix. 00063 UPLO = 'L' or 'l' A is a lower triangular matrix. 00064 Unchanged on exit. 00065 TRANS - CHARACTER*1. 00066 On entry, TRANS specifies the operation to be performed as 00067 follows: 00068 TRANS = 'N' or 'n' x := A*x. 00069 TRANS = 'T' or 't' x := A'*x. 00070 TRANS = 'C' or 'c' x := A'*x. 00071 Unchanged on exit. 00072 DIAG - CHARACTER*1. 00073 On entry, DIAG specifies whether or not A is unit 00074 triangular as follows: 00075 DIAG = 'U' or 'u' A is assumed to be unit triangular. 00076 DIAG = 'N' or 'n' A is not assumed to be unit 00077 triangular. 00078 Unchanged on exit. 00079 N - INTEGER. 00080 On entry, N specifies the order of the matrix A. 00081 N must be at least zero. 00082 Unchanged on exit. 00083 AP - DOUBLE PRECISION array of DIMENSION at least 00084 ( ( n*( n + 1 ) )/2 ). 00085 Before entry with UPLO = 'U' or 'u', the array AP must 00086 contain the upper triangular matrix packed sequentially, 00087 column by column, so that AP( 1 ) contains a( 1, 1 ), 00088 AP( 2 ) and AP( 3 ) contain a( 1, 2 ) and a( 2, 2 ) 00089 respectively, and so on. 00090 Before entry with UPLO = 'L' or 'l', the array AP must 00091 contain the lower triangular matrix packed sequentially, 00092 column by column, so that AP( 1 ) contains a( 1, 1 ), 00093 AP( 2 ) and AP( 3 ) contain a( 2, 1 ) and a( 3, 1 ) 00094 respectively, and so on. 00095 Note that when DIAG = 'U' or 'u', the diagonal elements of 00096 A are not referenced, but are assumed to be unity. 00097 Unchanged on exit. 00098 X - DOUBLE PRECISION array of dimension at least 00099 ( 1 + ( n - 1 )*abs( INCX ) ). 00100 Before entry, the incremented array X must contain the n 00101 element vector x. On exit, X is overwritten with the 00102 tranformed vector x. 00103 INCX - INTEGER. 00104 On entry, INCX specifies the increment for the elements of 00105 X. INCX must not be zero. 00106 Unchanged on exit. 00107 Level 2 Blas routine. 00108 -- Written on 22-October-1986. 00109 Jack Dongarra, Argonne National Lab. 00110 Jeremy Du Croz, Nag Central Office. 00111 Sven Hammarling, Nag Central Office. 00112 Richard Hanson, Sandia National Labs. 00113 Test the input parameters. 00114 Parameter adjustments */ 00115 --x; 00116 --ap; 00117 /* Initialization added by Elias to get rid of compiler warnings. */ 00118 kx = 0; 00119 /* Function Body */ 00120 info = 0; 00121 if (! template_blas_lsame(uplo, "U") && ! template_blas_lsame(uplo, "L")) { 00122 info = 1; 00123 } else if (! template_blas_lsame(trans, "N") && ! template_blas_lsame(trans, 00124 "T") && ! template_blas_lsame(trans, "C")) { 00125 info = 2; 00126 } else if (! template_blas_lsame(diag, "U") && ! template_blas_lsame(diag, 00127 "N")) { 00128 info = 3; 00129 } else if (*n < 0) { 00130 info = 4; 00131 } else if (*incx == 0) { 00132 info = 7; 00133 } 00134 if (info != 0) { 00135 template_blas_erbla("TPMV ", &info); 00136 return 0; 00137 } 00138 /* Quick return if possible. */ 00139 if (*n == 0) { 00140 return 0; 00141 } 00142 nounit = template_blas_lsame(diag, "N"); 00143 /* Set up the start point in X if the increment is not unity. This 00144 will be ( N - 1 )*INCX too small for descending loops. */ 00145 if (*incx <= 0) { 00146 kx = 1 - (*n - 1) * *incx; 00147 } else if (*incx != 1) { 00148 kx = 1; 00149 } 00150 /* Start the operations. In this version the elements of AP are 00151 accessed sequentially with one pass through AP. */ 00152 if (template_blas_lsame(trans, "N")) { 00153 /* Form x:= A*x. */ 00154 if (template_blas_lsame(uplo, "U")) { 00155 kk = 1; 00156 if (*incx == 1) { 00157 i__1 = *n; 00158 for (j = 1; j <= i__1; ++j) { 00159 if (x[j] != 0.) { 00160 temp = x[j]; 00161 k = kk; 00162 i__2 = j - 1; 00163 for (i__ = 1; i__ <= i__2; ++i__) { 00164 x[i__] += temp * ap[k]; 00165 ++k; 00166 /* L10: */ 00167 } 00168 if (nounit) { 00169 x[j] *= ap[kk + j - 1]; 00170 } 00171 } 00172 kk += j; 00173 /* L20: */ 00174 } 00175 } else { 00176 jx = kx; 00177 i__1 = *n; 00178 for (j = 1; j <= i__1; ++j) { 00179 if (x[jx] != 0.) { 00180 temp = x[jx]; 00181 ix = kx; 00182 i__2 = kk + j - 2; 00183 for (k = kk; k <= i__2; ++k) { 00184 x[ix] += temp * ap[k]; 00185 ix += *incx; 00186 /* L30: */ 00187 } 00188 if (nounit) { 00189 x[jx] *= ap[kk + j - 1]; 00190 } 00191 } 00192 jx += *incx; 00193 kk += j; 00194 /* L40: */ 00195 } 00196 } 00197 } else { 00198 kk = *n * (*n + 1) / 2; 00199 if (*incx == 1) { 00200 for (j = *n; j >= 1; --j) { 00201 if (x[j] != 0.) { 00202 temp = x[j]; 00203 k = kk; 00204 i__1 = j + 1; 00205 for (i__ = *n; i__ >= i__1; --i__) { 00206 x[i__] += temp * ap[k]; 00207 --k; 00208 /* L50: */ 00209 } 00210 if (nounit) { 00211 x[j] *= ap[kk - *n + j]; 00212 } 00213 } 00214 kk -= *n - j + 1; 00215 /* L60: */ 00216 } 00217 } else { 00218 kx += (*n - 1) * *incx; 00219 jx = kx; 00220 for (j = *n; j >= 1; --j) { 00221 if (x[jx] != 0.) { 00222 temp = x[jx]; 00223 ix = kx; 00224 i__1 = kk - (*n - (j + 1)); 00225 for (k = kk; k >= i__1; --k) { 00226 x[ix] += temp * ap[k]; 00227 ix -= *incx; 00228 /* L70: */ 00229 } 00230 if (nounit) { 00231 x[jx] *= ap[kk - *n + j]; 00232 } 00233 } 00234 jx -= *incx; 00235 kk -= *n - j + 1; 00236 /* L80: */ 00237 } 00238 } 00239 } 00240 } else { 00241 /* Form x := A'*x. */ 00242 if (template_blas_lsame(uplo, "U")) { 00243 kk = *n * (*n + 1) / 2; 00244 if (*incx == 1) { 00245 for (j = *n; j >= 1; --j) { 00246 temp = x[j]; 00247 if (nounit) { 00248 temp *= ap[kk]; 00249 } 00250 k = kk - 1; 00251 for (i__ = j - 1; i__ >= 1; --i__) { 00252 temp += ap[k] * x[i__]; 00253 --k; 00254 /* L90: */ 00255 } 00256 x[j] = temp; 00257 kk -= j; 00258 /* L100: */ 00259 } 00260 } else { 00261 jx = kx + (*n - 1) * *incx; 00262 for (j = *n; j >= 1; --j) { 00263 temp = x[jx]; 00264 ix = jx; 00265 if (nounit) { 00266 temp *= ap[kk]; 00267 } 00268 i__1 = kk - j + 1; 00269 for (k = kk - 1; k >= i__1; --k) { 00270 ix -= *incx; 00271 temp += ap[k] * x[ix]; 00272 /* L110: */ 00273 } 00274 x[jx] = temp; 00275 jx -= *incx; 00276 kk -= j; 00277 /* L120: */ 00278 } 00279 } 00280 } else { 00281 kk = 1; 00282 if (*incx == 1) { 00283 i__1 = *n; 00284 for (j = 1; j <= i__1; ++j) { 00285 temp = x[j]; 00286 if (nounit) { 00287 temp *= ap[kk]; 00288 } 00289 k = kk + 1; 00290 i__2 = *n; 00291 for (i__ = j + 1; i__ <= i__2; ++i__) { 00292 temp += ap[k] * x[i__]; 00293 ++k; 00294 /* L130: */ 00295 } 00296 x[j] = temp; 00297 kk += *n - j + 1; 00298 /* L140: */ 00299 } 00300 } else { 00301 jx = kx; 00302 i__1 = *n; 00303 for (j = 1; j <= i__1; ++j) { 00304 temp = x[jx]; 00305 ix = jx; 00306 if (nounit) { 00307 temp *= ap[kk]; 00308 } 00309 i__2 = kk + *n - j; 00310 for (k = kk + 1; k <= i__2; ++k) { 00311 ix += *incx; 00312 temp += ap[k] * x[ix]; 00313 /* L150: */ 00314 } 00315 x[jx] = temp; 00316 jx += *incx; 00317 kk += *n - j + 1; 00318 /* L160: */ 00319 } 00320 } 00321 } 00322 } 00323 return 0; 00324 /* End of DTPMV . */ 00325 } /* dtpmv_ */ 00326 00327 #endif