ergo
template_blas_gemv.h
Go to the documentation of this file.
00001 /* Ergo, version 3.2, a program for linear scaling electronic structure
00002  * calculations.
00003  * Copyright (C) 2012 Elias Rudberg, Emanuel H. Rubensson, and Pawel Salek.
00004  * 
00005  * This program is free software: you can redistribute it and/or modify
00006  * it under the terms of the GNU General Public License as published by
00007  * the Free Software Foundation, either version 3 of the License, or
00008  * (at your option) any later version.
00009  * 
00010  * This program is distributed in the hope that it will be useful,
00011  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00012  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00013  * GNU General Public License for more details.
00014  * 
00015  * You should have received a copy of the GNU General Public License
00016  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
00017  * 
00018  * Primary academic reference:
00019  * Kohn−Sham Density Functional Theory Electronic Structure Calculations 
00020  * with Linearly Scaling Computational Time and Memory Usage,
00021  * Elias Rudberg, Emanuel H. Rubensson, and Pawel Salek,
00022  * J. Chem. Theory Comput. 7, 340 (2011),
00023  * <http://dx.doi.org/10.1021/ct100611z>
00024  * 
00025  * For further information about Ergo, see <http://www.ergoscf.org>.
00026  */
00027  
00028  /* This file belongs to the template_lapack part of the Ergo source 
00029   * code. The source files in the template_lapack directory are modified
00030   * versions of files originally distributed as CLAPACK, see the
00031   * Copyright/license notice in the file template_lapack/COPYING.
00032   */
00033  
00034 
00035 #ifndef TEMPLATE_BLAS_GEMV_HEADER
00036 #define TEMPLATE_BLAS_GEMV_HEADER
00037 
00038 #include "template_blas_common.h"
00039 
00040 template<class Treal>
00041 int template_blas_gemv(const char *trans, const integer *m, const integer *n, const Treal *
00042         alpha, const Treal *a, const integer *lda, const Treal *x, const integer *incx, 
00043         const Treal *beta, Treal *y, const integer *incy)
00044 {
00045     /* System generated locals */
00046     integer a_dim1, a_offset, i__1, i__2;
00047     /* Local variables */
00048      integer info;
00049      Treal temp;
00050      integer lenx, leny, i__, j;
00051      integer ix, iy, jx, jy, kx, ky;
00052 #define a_ref(a_1,a_2) a[(a_2)*a_dim1 + a_1]
00053 /*  Purpose   
00054     =======   
00055     DGEMV  performs one of the matrix-vector operations   
00056        y := alpha*A*x + beta*y,   or   y := alpha*A'*x + beta*y,   
00057     where alpha and beta are scalars, x and y are vectors and A is an   
00058     m by n matrix.   
00059     Parameters   
00060     ==========   
00061     TRANS  - CHARACTER*1.   
00062              On entry, TRANS specifies the operation to be performed as   
00063              follows:   
00064                 TRANS = 'N' or 'n'   y := alpha*A*x + beta*y.   
00065                 TRANS = 'T' or 't'   y := alpha*A'*x + beta*y.   
00066                 TRANS = 'C' or 'c'   y := alpha*A'*x + beta*y.   
00067              Unchanged on exit.   
00068     M      - INTEGER.   
00069              On entry, M specifies the number of rows of the matrix A.   
00070              M must be at least zero.   
00071              Unchanged on exit.   
00072     N      - INTEGER.   
00073              On entry, N specifies the number of columns of the matrix A.   
00074              N must be at least zero.   
00075              Unchanged on exit.   
00076     ALPHA  - DOUBLE PRECISION.   
00077              On entry, ALPHA specifies the scalar alpha.   
00078              Unchanged on exit.   
00079     A      - DOUBLE PRECISION array of DIMENSION ( LDA, n ).   
00080              Before entry, the leading m by n part of the array A must   
00081              contain the matrix of coefficients.   
00082              Unchanged on exit.   
00083     LDA    - INTEGER.   
00084              On entry, LDA specifies the first dimension of A as declared   
00085              in the calling (sub) program. LDA must be at least   
00086              max( 1, m ).   
00087              Unchanged on exit.   
00088     X      - DOUBLE PRECISION array of DIMENSION at least   
00089              ( 1 + ( n - 1 )*abs( INCX ) ) when TRANS = 'N' or 'n'   
00090              and at least   
00091              ( 1 + ( m - 1 )*abs( INCX ) ) otherwise.   
00092              Before entry, the incremented array X must contain the   
00093              vector x.   
00094              Unchanged on exit.   
00095     INCX   - INTEGER.   
00096              On entry, INCX specifies the increment for the elements of   
00097              X. INCX must not be zero.   
00098              Unchanged on exit.   
00099     BETA   - DOUBLE PRECISION.   
00100              On entry, BETA specifies the scalar beta. When BETA is   
00101              supplied as zero then Y need not be set on input.   
00102              Unchanged on exit.   
00103     Y      - DOUBLE PRECISION array of DIMENSION at least   
00104              ( 1 + ( m - 1 )*abs( INCY ) ) when TRANS = 'N' or 'n'   
00105              and at least   
00106              ( 1 + ( n - 1 )*abs( INCY ) ) otherwise.   
00107              Before entry with BETA non-zero, the incremented array Y   
00108              must contain the vector y. On exit, Y is overwritten by the   
00109              updated vector y.   
00110     INCY   - INTEGER.   
00111              On entry, INCY specifies the increment for the elements of   
00112              Y. INCY must not be zero.   
00113              Unchanged on exit.   
00114     Level 2 Blas routine.   
00115     -- Written on 22-October-1986.   
00116        Jack Dongarra, Argonne National Lab.   
00117        Jeremy Du Croz, Nag Central Office.   
00118        Sven Hammarling, Nag Central Office.   
00119        Richard Hanson, Sandia National Labs.   
00120        Test the input parameters.   
00121        Parameter adjustments */
00122     a_dim1 = *lda;
00123     a_offset = 1 + a_dim1 * 1;
00124     a -= a_offset;
00125     --x;
00126     --y;
00127     /* Function Body */
00128     info = 0;
00129     if (! template_blas_lsame(trans, "N") && ! template_blas_lsame(trans, "T") && ! template_blas_lsame(trans, "C")
00130             ) {
00131         info = 1;
00132     } else if (*m < 0) {
00133         info = 2;
00134     } else if (*n < 0) {
00135         info = 3;
00136     } else if (*lda < maxMACRO(1,*m)) {
00137         info = 6;
00138     } else if (*incx == 0) {
00139         info = 8;
00140     } else if (*incy == 0) {
00141         info = 11;
00142     }
00143     if (info != 0) {
00144         template_blas_erbla("GEMV  ", &info);
00145         return 0;
00146     }
00147 /*     Quick return if possible. */
00148     if (*m == 0 || *n == 0 || (*alpha == 0. && *beta == 1.) ) {
00149         return 0;
00150     }
00151 /*     Set  LENX  and  LENY, the lengths of the vectors x and y, and set   
00152        up the start points in  X  and  Y. */
00153     if (template_blas_lsame(trans, "N")) {
00154         lenx = *n;
00155         leny = *m;
00156     } else {
00157         lenx = *m;
00158         leny = *n;
00159     }
00160     if (*incx > 0) {
00161         kx = 1;
00162     } else {
00163         kx = 1 - (lenx - 1) * *incx;
00164     }
00165     if (*incy > 0) {
00166         ky = 1;
00167     } else {
00168         ky = 1 - (leny - 1) * *incy;
00169     }
00170 /*     Start the operations. In this version the elements of A are   
00171        accessed sequentially with one pass through A.   
00172        First form  y := beta*y. */
00173     if (*beta != 1.) {
00174         if (*incy == 1) {
00175             if (*beta == 0.) {
00176                 i__1 = leny;
00177                 for (i__ = 1; i__ <= i__1; ++i__) {
00178                     y[i__] = 0.;
00179 /* L10: */
00180                 }
00181             } else {
00182                 i__1 = leny;
00183                 for (i__ = 1; i__ <= i__1; ++i__) {
00184                     y[i__] = *beta * y[i__];
00185 /* L20: */
00186                 }
00187             }
00188         } else {
00189             iy = ky;
00190             if (*beta == 0.) {
00191                 i__1 = leny;
00192                 for (i__ = 1; i__ <= i__1; ++i__) {
00193                     y[iy] = 0.;
00194                     iy += *incy;
00195 /* L30: */
00196                 }
00197             } else {
00198                 i__1 = leny;
00199                 for (i__ = 1; i__ <= i__1; ++i__) {
00200                     y[iy] = *beta * y[iy];
00201                     iy += *incy;
00202 /* L40: */
00203                 }
00204             }
00205         }
00206     }
00207     if (*alpha == 0.) {
00208         return 0;
00209     }
00210     if (template_blas_lsame(trans, "N")) {
00211 /*        Form  y := alpha*A*x + y. */
00212         jx = kx;
00213         if (*incy == 1) {
00214             i__1 = *n;
00215             for (j = 1; j <= i__1; ++j) {
00216                 if (x[jx] != 0.) {
00217                     temp = *alpha * x[jx];
00218                     i__2 = *m;
00219                     for (i__ = 1; i__ <= i__2; ++i__) {
00220                         y[i__] += temp * a_ref(i__, j);
00221 /* L50: */
00222                     }
00223                 }
00224                 jx += *incx;
00225 /* L60: */
00226             }
00227         } else {
00228             i__1 = *n;
00229             for (j = 1; j <= i__1; ++j) {
00230                 if (x[jx] != 0.) {
00231                     temp = *alpha * x[jx];
00232                     iy = ky;
00233                     i__2 = *m;
00234                     for (i__ = 1; i__ <= i__2; ++i__) {
00235                         y[iy] += temp * a_ref(i__, j);
00236                         iy += *incy;
00237 /* L70: */
00238                     }
00239                 }
00240                 jx += *incx;
00241 /* L80: */
00242             }
00243         }
00244     } else {
00245 /*        Form  y := alpha*A'*x + y. */
00246         jy = ky;
00247         if (*incx == 1) {
00248             i__1 = *n;
00249             for (j = 1; j <= i__1; ++j) {
00250                 temp = 0.;
00251                 i__2 = *m;
00252                 for (i__ = 1; i__ <= i__2; ++i__) {
00253                     temp += a_ref(i__, j) * x[i__];
00254 /* L90: */
00255                 }
00256                 y[jy] += *alpha * temp;
00257                 jy += *incy;
00258 /* L100: */
00259             }
00260         } else {
00261             i__1 = *n;
00262             for (j = 1; j <= i__1; ++j) {
00263                 temp = 0.;
00264                 ix = kx;
00265                 i__2 = *m;
00266                 for (i__ = 1; i__ <= i__2; ++i__) {
00267                     temp += a_ref(i__, j) * x[ix];
00268                     ix += *incx;
00269 /* L110: */
00270                 }
00271                 y[jy] += *alpha * temp;
00272                 jy += *incy;
00273 /* L120: */
00274             }
00275         }
00276     }
00277     return 0;
00278 /*     End of DGEMV . */
00279 } /* dgemv_ */
00280 #undef a_ref
00281 
00282 #endif