ergo
template_blas_trsv.h
Go to the documentation of this file.
00001 /* Ergo, version 3.2, a program for linear scaling electronic structure
00002  * calculations.
00003  * Copyright (C) 2012 Elias Rudberg, Emanuel H. Rubensson, and Pawel Salek.
00004  * 
00005  * This program is free software: you can redistribute it and/or modify
00006  * it under the terms of the GNU General Public License as published by
00007  * the Free Software Foundation, either version 3 of the License, or
00008  * (at your option) any later version.
00009  * 
00010  * This program is distributed in the hope that it will be useful,
00011  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00012  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00013  * GNU General Public License for more details.
00014  * 
00015  * You should have received a copy of the GNU General Public License
00016  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
00017  * 
00018  * Primary academic reference:
00019  * Kohn−Sham Density Functional Theory Electronic Structure Calculations 
00020  * with Linearly Scaling Computational Time and Memory Usage,
00021  * Elias Rudberg, Emanuel H. Rubensson, and Pawel Salek,
00022  * J. Chem. Theory Comput. 7, 340 (2011),
00023  * <http://dx.doi.org/10.1021/ct100611z>
00024  * 
00025  * For further information about Ergo, see <http://www.ergoscf.org>.
00026  */
00027  
00028  /* This file belongs to the template_lapack part of the Ergo source 
00029   * code. The source files in the template_lapack directory are modified
00030   * versions of files originally distributed as CLAPACK, see the
00031   * Copyright/license notice in the file template_lapack/COPYING.
00032   */
00033  
00034 
00035 #ifndef TEMPLATE_BLAS_TRSV_HEADER
00036 #define TEMPLATE_BLAS_TRSV_HEADER
00037 
00038 
00039 template<class Treal>
00040 int template_blas_trsv(const char *uplo, const char *trans, const char *diag, const integer *n, 
00041         const Treal *a, const integer *lda, Treal *x, const integer *incx)
00042 {
00043     /* System generated locals */
00044     integer a_dim1, a_offset, i__1, i__2;
00045     /* Local variables */
00046      integer info;
00047      Treal temp;
00048      integer i__, j;
00049      integer ix, jx, kx;
00050      logical nounit;
00051 #define a_ref(a_1,a_2) a[(a_2)*a_dim1 + a_1]
00052 /*  Purpose   
00053     =======   
00054     DTRSV  solves one of the systems of equations   
00055        A*x = b,   or   A'*x = b,   
00056     where b and x are n element vectors and A is an n by n unit, or   
00057     non-unit, upper or lower triangular matrix.   
00058     No test for singularity or near-singularity is included in this   
00059     routine. Such tests must be performed before calling this routine.   
00060     Parameters   
00061     ==========   
00062     UPLO   - CHARACTER*1.   
00063              On entry, UPLO specifies whether the matrix is an upper or   
00064              lower triangular matrix as follows:   
00065                 UPLO = 'U' or 'u'   A is an upper triangular matrix.   
00066                 UPLO = 'L' or 'l'   A is a lower triangular matrix.   
00067              Unchanged on exit.   
00068     TRANS  - CHARACTER*1.   
00069              On entry, TRANS specifies the equations to be solved as   
00070              follows:   
00071                 TRANS = 'N' or 'n'   A*x = b.   
00072                 TRANS = 'T' or 't'   A'*x = b.   
00073                 TRANS = 'C' or 'c'   A'*x = b.   
00074              Unchanged on exit.   
00075     DIAG   - CHARACTER*1.   
00076              On entry, DIAG specifies whether or not A is unit   
00077              triangular as follows:   
00078                 DIAG = 'U' or 'u'   A is assumed to be unit triangular.   
00079                 DIAG = 'N' or 'n'   A is not assumed to be unit   
00080                                     triangular.   
00081              Unchanged on exit.   
00082     N      - INTEGER.   
00083              On entry, N specifies the order of the matrix A.   
00084              N must be at least zero.   
00085              Unchanged on exit.   
00086     A      - DOUBLE PRECISION array of DIMENSION ( LDA, n ).   
00087              Before entry with  UPLO = 'U' or 'u', the leading n by n   
00088              upper triangular part of the array A must contain the upper   
00089              triangular matrix and the strictly lower triangular part of   
00090              A is not referenced.   
00091              Before entry with UPLO = 'L' or 'l', the leading n by n   
00092              lower triangular part of the array A must contain the lower   
00093              triangular matrix and the strictly upper triangular part of   
00094              A is not referenced.   
00095              Note that when  DIAG = 'U' or 'u', the diagonal elements of   
00096              A are not referenced either, but are assumed to be unity.   
00097              Unchanged on exit.   
00098     LDA    - INTEGER.   
00099              On entry, LDA specifies the first dimension of A as declared   
00100              in the calling (sub) program. LDA must be at least   
00101              max( 1, n ).   
00102              Unchanged on exit.   
00103     X      - DOUBLE PRECISION array of dimension at least   
00104              ( 1 + ( n - 1 )*abs( INCX ) ).   
00105              Before entry, the incremented array X must contain the n   
00106              element right-hand side vector b. On exit, X is overwritten   
00107              with the solution vector x.   
00108     INCX   - INTEGER.   
00109              On entry, INCX specifies the increment for the elements of   
00110              X. INCX must not be zero.   
00111              Unchanged on exit.   
00112     Level 2 Blas routine.   
00113     -- Written on 22-October-1986.   
00114        Jack Dongarra, Argonne National Lab.   
00115        Jeremy Du Croz, Nag Central Office.   
00116        Sven Hammarling, Nag Central Office.   
00117        Richard Hanson, Sandia National Labs.   
00118        Test the input parameters.   
00119        Parameter adjustments */
00120     a_dim1 = *lda;
00121     a_offset = 1 + a_dim1 * 1;
00122     a -= a_offset;
00123     --x;
00124     /* Initialization added by Elias to get rid of compiler warnings. */
00125     kx = 0;
00126     /* Function Body */
00127     info = 0;
00128     if (! template_blas_lsame(uplo, "U") && ! template_blas_lsame(uplo, "L")) {
00129         info = 1;
00130     } else if (! template_blas_lsame(trans, "N") && ! template_blas_lsame(trans, 
00131             "T") && ! template_blas_lsame(trans, "C")) {
00132         info = 2;
00133     } else if (! template_blas_lsame(diag, "U") && ! template_blas_lsame(diag, 
00134             "N")) {
00135         info = 3;
00136     } else if (*n < 0) {
00137         info = 4;
00138     } else if (*lda < maxMACRO(1,*n)) {
00139         info = 6;
00140     } else if (*incx == 0) {
00141         info = 8;
00142     }
00143     if (info != 0) {
00144         template_blas_erbla("TRSV  ", &info);
00145         return 0;
00146     }
00147 /*     Quick return if possible. */
00148     if (*n == 0) {
00149         return 0;
00150     }
00151     nounit = template_blas_lsame(diag, "N");
00152 /*     Set up the start point in X if the increment is not unity. This   
00153        will be  ( N - 1 )*INCX  too small for descending loops. */
00154     if (*incx <= 0) {
00155         kx = 1 - (*n - 1) * *incx;
00156     } else if (*incx != 1) {
00157         kx = 1;
00158     }
00159 /*     Start the operations. In this version the elements of A are   
00160        accessed sequentially with one pass through A. */
00161     if (template_blas_lsame(trans, "N")) {
00162 /*        Form  x := inv( A )*x. */
00163         if (template_blas_lsame(uplo, "U")) {
00164             if (*incx == 1) {
00165                 for (j = *n; j >= 1; --j) {
00166                     if (x[j] != 0.) {
00167                         if (nounit) {
00168                             x[j] /= a_ref(j, j);
00169                         }
00170                         temp = x[j];
00171                         for (i__ = j - 1; i__ >= 1; --i__) {
00172                             x[i__] -= temp * a_ref(i__, j);
00173 /* L10: */
00174                         }
00175                     }
00176 /* L20: */
00177                 }
00178             } else {
00179                 jx = kx + (*n - 1) * *incx;
00180                 for (j = *n; j >= 1; --j) {
00181                     if (x[jx] != 0.) {
00182                         if (nounit) {
00183                             x[jx] /= a_ref(j, j);
00184                         }
00185                         temp = x[jx];
00186                         ix = jx;
00187                         for (i__ = j - 1; i__ >= 1; --i__) {
00188                             ix -= *incx;
00189                             x[ix] -= temp * a_ref(i__, j);
00190 /* L30: */
00191                         }
00192                     }
00193                     jx -= *incx;
00194 /* L40: */
00195                 }
00196             }
00197         } else {
00198             if (*incx == 1) {
00199                 i__1 = *n;
00200                 for (j = 1; j <= i__1; ++j) {
00201                     if (x[j] != 0.) {
00202                         if (nounit) {
00203                             x[j] /= a_ref(j, j);
00204                         }
00205                         temp = x[j];
00206                         i__2 = *n;
00207                         for (i__ = j + 1; i__ <= i__2; ++i__) {
00208                             x[i__] -= temp * a_ref(i__, j);
00209 /* L50: */
00210                         }
00211                     }
00212 /* L60: */
00213                 }
00214             } else {
00215                 jx = kx;
00216                 i__1 = *n;
00217                 for (j = 1; j <= i__1; ++j) {
00218                     if (x[jx] != 0.) {
00219                         if (nounit) {
00220                             x[jx] /= a_ref(j, j);
00221                         }
00222                         temp = x[jx];
00223                         ix = jx;
00224                         i__2 = *n;
00225                         for (i__ = j + 1; i__ <= i__2; ++i__) {
00226                             ix += *incx;
00227                             x[ix] -= temp * a_ref(i__, j);
00228 /* L70: */
00229                         }
00230                     }
00231                     jx += *incx;
00232 /* L80: */
00233                 }
00234             }
00235         }
00236     } else {
00237 /*        Form  x := inv( A' )*x. */
00238         if (template_blas_lsame(uplo, "U")) {
00239             if (*incx == 1) {
00240                 i__1 = *n;
00241                 for (j = 1; j <= i__1; ++j) {
00242                     temp = x[j];
00243                     i__2 = j - 1;
00244                     for (i__ = 1; i__ <= i__2; ++i__) {
00245                         temp -= a_ref(i__, j) * x[i__];
00246 /* L90: */
00247                     }
00248                     if (nounit) {
00249                         temp /= a_ref(j, j);
00250                     }
00251                     x[j] = temp;
00252 /* L100: */
00253                 }
00254             } else {
00255                 jx = kx;
00256                 i__1 = *n;
00257                 for (j = 1; j <= i__1; ++j) {
00258                     temp = x[jx];
00259                     ix = kx;
00260                     i__2 = j - 1;
00261                     for (i__ = 1; i__ <= i__2; ++i__) {
00262                         temp -= a_ref(i__, j) * x[ix];
00263                         ix += *incx;
00264 /* L110: */
00265                     }
00266                     if (nounit) {
00267                         temp /= a_ref(j, j);
00268                     }
00269                     x[jx] = temp;
00270                     jx += *incx;
00271 /* L120: */
00272                 }
00273             }
00274         } else {
00275             if (*incx == 1) {
00276                 for (j = *n; j >= 1; --j) {
00277                     temp = x[j];
00278                     i__1 = j + 1;
00279                     for (i__ = *n; i__ >= i__1; --i__) {
00280                         temp -= a_ref(i__, j) * x[i__];
00281 /* L130: */
00282                     }
00283                     if (nounit) {
00284                         temp /= a_ref(j, j);
00285                     }
00286                     x[j] = temp;
00287 /* L140: */
00288                 }
00289             } else {
00290                 kx += (*n - 1) * *incx;
00291                 jx = kx;
00292                 for (j = *n; j >= 1; --j) {
00293                     temp = x[jx];
00294                     ix = kx;
00295                     i__1 = j + 1;
00296                     for (i__ = *n; i__ >= i__1; --i__) {
00297                         temp -= a_ref(i__, j) * x[ix];
00298                         ix -= *incx;
00299 /* L150: */
00300                     }
00301                     if (nounit) {
00302                         temp /= a_ref(j, j);
00303                     }
00304                     x[jx] = temp;
00305                     jx -= *incx;
00306 /* L160: */
00307                 }
00308             }
00309         }
00310     }
00311     return 0;
00312 /*     End of DTRSV . */
00313 } /* dtrsv_ */
00314 #undef a_ref
00315 
00316 #endif