ergo
|
00001 /* Ergo, version 3.2, a program for linear scaling electronic structure 00002 * calculations. 00003 * Copyright (C) 2012 Elias Rudberg, Emanuel H. Rubensson, and Pawel Salek. 00004 * 00005 * This program is free software: you can redistribute it and/or modify 00006 * it under the terms of the GNU General Public License as published by 00007 * the Free Software Foundation, either version 3 of the License, or 00008 * (at your option) any later version. 00009 * 00010 * This program is distributed in the hope that it will be useful, 00011 * but WITHOUT ANY WARRANTY; without even the implied warranty of 00012 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00013 * GNU General Public License for more details. 00014 * 00015 * You should have received a copy of the GNU General Public License 00016 * along with this program. If not, see <http://www.gnu.org/licenses/>. 00017 * 00018 * Primary academic reference: 00019 * KohnâSham Density Functional Theory Electronic Structure Calculations 00020 * with Linearly Scaling Computational Time and Memory Usage, 00021 * Elias Rudberg, Emanuel H. Rubensson, and Pawel Salek, 00022 * J. Chem. Theory Comput. 7, 340 (2011), 00023 * <http://dx.doi.org/10.1021/ct100611z> 00024 * 00025 * For further information about Ergo, see <http://www.ergoscf.org>. 00026 */ 00027 00028 /* This file belongs to the template_lapack part of the Ergo source 00029 * code. The source files in the template_lapack directory are modified 00030 * versions of files originally distributed as CLAPACK, see the 00031 * Copyright/license notice in the file template_lapack/COPYING. 00032 */ 00033 00034 00035 #ifndef TEMPLATE_BLAS_GEMV_HEADER 00036 #define TEMPLATE_BLAS_GEMV_HEADER 00037 00038 #include "template_blas_common.h" 00039 00040 template<class Treal> 00041 int template_blas_gemv(const char *trans, const integer *m, const integer *n, const Treal * 00042 alpha, const Treal *a, const integer *lda, const Treal *x, const integer *incx, 00043 const Treal *beta, Treal *y, const integer *incy) 00044 { 00045 /* System generated locals */ 00046 integer a_dim1, a_offset, i__1, i__2; 00047 /* Local variables */ 00048 integer info; 00049 Treal temp; 00050 integer lenx, leny, i__, j; 00051 integer ix, iy, jx, jy, kx, ky; 00052 #define a_ref(a_1,a_2) a[(a_2)*a_dim1 + a_1] 00053 /* Purpose 00054 ======= 00055 DGEMV performs one of the matrix-vector operations 00056 y := alpha*A*x + beta*y, or y := alpha*A'*x + beta*y, 00057 where alpha and beta are scalars, x and y are vectors and A is an 00058 m by n matrix. 00059 Parameters 00060 ========== 00061 TRANS - CHARACTER*1. 00062 On entry, TRANS specifies the operation to be performed as 00063 follows: 00064 TRANS = 'N' or 'n' y := alpha*A*x + beta*y. 00065 TRANS = 'T' or 't' y := alpha*A'*x + beta*y. 00066 TRANS = 'C' or 'c' y := alpha*A'*x + beta*y. 00067 Unchanged on exit. 00068 M - INTEGER. 00069 On entry, M specifies the number of rows of the matrix A. 00070 M must be at least zero. 00071 Unchanged on exit. 00072 N - INTEGER. 00073 On entry, N specifies the number of columns of the matrix A. 00074 N must be at least zero. 00075 Unchanged on exit. 00076 ALPHA - DOUBLE PRECISION. 00077 On entry, ALPHA specifies the scalar alpha. 00078 Unchanged on exit. 00079 A - DOUBLE PRECISION array of DIMENSION ( LDA, n ). 00080 Before entry, the leading m by n part of the array A must 00081 contain the matrix of coefficients. 00082 Unchanged on exit. 00083 LDA - INTEGER. 00084 On entry, LDA specifies the first dimension of A as declared 00085 in the calling (sub) program. LDA must be at least 00086 max( 1, m ). 00087 Unchanged on exit. 00088 X - DOUBLE PRECISION array of DIMENSION at least 00089 ( 1 + ( n - 1 )*abs( INCX ) ) when TRANS = 'N' or 'n' 00090 and at least 00091 ( 1 + ( m - 1 )*abs( INCX ) ) otherwise. 00092 Before entry, the incremented array X must contain the 00093 vector x. 00094 Unchanged on exit. 00095 INCX - INTEGER. 00096 On entry, INCX specifies the increment for the elements of 00097 X. INCX must not be zero. 00098 Unchanged on exit. 00099 BETA - DOUBLE PRECISION. 00100 On entry, BETA specifies the scalar beta. When BETA is 00101 supplied as zero then Y need not be set on input. 00102 Unchanged on exit. 00103 Y - DOUBLE PRECISION array of DIMENSION at least 00104 ( 1 + ( m - 1 )*abs( INCY ) ) when TRANS = 'N' or 'n' 00105 and at least 00106 ( 1 + ( n - 1 )*abs( INCY ) ) otherwise. 00107 Before entry with BETA non-zero, the incremented array Y 00108 must contain the vector y. On exit, Y is overwritten by the 00109 updated vector y. 00110 INCY - INTEGER. 00111 On entry, INCY specifies the increment for the elements of 00112 Y. INCY must not be zero. 00113 Unchanged on exit. 00114 Level 2 Blas routine. 00115 -- Written on 22-October-1986. 00116 Jack Dongarra, Argonne National Lab. 00117 Jeremy Du Croz, Nag Central Office. 00118 Sven Hammarling, Nag Central Office. 00119 Richard Hanson, Sandia National Labs. 00120 Test the input parameters. 00121 Parameter adjustments */ 00122 a_dim1 = *lda; 00123 a_offset = 1 + a_dim1 * 1; 00124 a -= a_offset; 00125 --x; 00126 --y; 00127 /* Function Body */ 00128 info = 0; 00129 if (! template_blas_lsame(trans, "N") && ! template_blas_lsame(trans, "T") && ! template_blas_lsame(trans, "C") 00130 ) { 00131 info = 1; 00132 } else if (*m < 0) { 00133 info = 2; 00134 } else if (*n < 0) { 00135 info = 3; 00136 } else if (*lda < maxMACRO(1,*m)) { 00137 info = 6; 00138 } else if (*incx == 0) { 00139 info = 8; 00140 } else if (*incy == 0) { 00141 info = 11; 00142 } 00143 if (info != 0) { 00144 template_blas_erbla("GEMV ", &info); 00145 return 0; 00146 } 00147 /* Quick return if possible. */ 00148 if (*m == 0 || *n == 0 || (*alpha == 0. && *beta == 1.) ) { 00149 return 0; 00150 } 00151 /* Set LENX and LENY, the lengths of the vectors x and y, and set 00152 up the start points in X and Y. */ 00153 if (template_blas_lsame(trans, "N")) { 00154 lenx = *n; 00155 leny = *m; 00156 } else { 00157 lenx = *m; 00158 leny = *n; 00159 } 00160 if (*incx > 0) { 00161 kx = 1; 00162 } else { 00163 kx = 1 - (lenx - 1) * *incx; 00164 } 00165 if (*incy > 0) { 00166 ky = 1; 00167 } else { 00168 ky = 1 - (leny - 1) * *incy; 00169 } 00170 /* Start the operations. In this version the elements of A are 00171 accessed sequentially with one pass through A. 00172 First form y := beta*y. */ 00173 if (*beta != 1.) { 00174 if (*incy == 1) { 00175 if (*beta == 0.) { 00176 i__1 = leny; 00177 for (i__ = 1; i__ <= i__1; ++i__) { 00178 y[i__] = 0.; 00179 /* L10: */ 00180 } 00181 } else { 00182 i__1 = leny; 00183 for (i__ = 1; i__ <= i__1; ++i__) { 00184 y[i__] = *beta * y[i__]; 00185 /* L20: */ 00186 } 00187 } 00188 } else { 00189 iy = ky; 00190 if (*beta == 0.) { 00191 i__1 = leny; 00192 for (i__ = 1; i__ <= i__1; ++i__) { 00193 y[iy] = 0.; 00194 iy += *incy; 00195 /* L30: */ 00196 } 00197 } else { 00198 i__1 = leny; 00199 for (i__ = 1; i__ <= i__1; ++i__) { 00200 y[iy] = *beta * y[iy]; 00201 iy += *incy; 00202 /* L40: */ 00203 } 00204 } 00205 } 00206 } 00207 if (*alpha == 0.) { 00208 return 0; 00209 } 00210 if (template_blas_lsame(trans, "N")) { 00211 /* Form y := alpha*A*x + y. */ 00212 jx = kx; 00213 if (*incy == 1) { 00214 i__1 = *n; 00215 for (j = 1; j <= i__1; ++j) { 00216 if (x[jx] != 0.) { 00217 temp = *alpha * x[jx]; 00218 i__2 = *m; 00219 for (i__ = 1; i__ <= i__2; ++i__) { 00220 y[i__] += temp * a_ref(i__, j); 00221 /* L50: */ 00222 } 00223 } 00224 jx += *incx; 00225 /* L60: */ 00226 } 00227 } else { 00228 i__1 = *n; 00229 for (j = 1; j <= i__1; ++j) { 00230 if (x[jx] != 0.) { 00231 temp = *alpha * x[jx]; 00232 iy = ky; 00233 i__2 = *m; 00234 for (i__ = 1; i__ <= i__2; ++i__) { 00235 y[iy] += temp * a_ref(i__, j); 00236 iy += *incy; 00237 /* L70: */ 00238 } 00239 } 00240 jx += *incx; 00241 /* L80: */ 00242 } 00243 } 00244 } else { 00245 /* Form y := alpha*A'*x + y. */ 00246 jy = ky; 00247 if (*incx == 1) { 00248 i__1 = *n; 00249 for (j = 1; j <= i__1; ++j) { 00250 temp = 0.; 00251 i__2 = *m; 00252 for (i__ = 1; i__ <= i__2; ++i__) { 00253 temp += a_ref(i__, j) * x[i__]; 00254 /* L90: */ 00255 } 00256 y[jy] += *alpha * temp; 00257 jy += *incy; 00258 /* L100: */ 00259 } 00260 } else { 00261 i__1 = *n; 00262 for (j = 1; j <= i__1; ++j) { 00263 temp = 0.; 00264 ix = kx; 00265 i__2 = *m; 00266 for (i__ = 1; i__ <= i__2; ++i__) { 00267 temp += a_ref(i__, j) * x[ix]; 00268 ix += *incx; 00269 /* L110: */ 00270 } 00271 y[jy] += *alpha * temp; 00272 jy += *incy; 00273 /* L120: */ 00274 } 00275 } 00276 } 00277 return 0; 00278 /* End of DGEMV . */ 00279 } /* dgemv_ */ 00280 #undef a_ref 00281 00282 #endif