ergo
template_lapack_sytrd.h
Go to the documentation of this file.
00001 /* Ergo, version 3.2, a program for linear scaling electronic structure
00002  * calculations.
00003  * Copyright (C) 2012 Elias Rudberg, Emanuel H. Rubensson, and Pawel Salek.
00004  * 
00005  * This program is free software: you can redistribute it and/or modify
00006  * it under the terms of the GNU General Public License as published by
00007  * the Free Software Foundation, either version 3 of the License, or
00008  * (at your option) any later version.
00009  * 
00010  * This program is distributed in the hope that it will be useful,
00011  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00012  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00013  * GNU General Public License for more details.
00014  * 
00015  * You should have received a copy of the GNU General Public License
00016  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
00017  * 
00018  * Primary academic reference:
00019  * Kohn−Sham Density Functional Theory Electronic Structure Calculations 
00020  * with Linearly Scaling Computational Time and Memory Usage,
00021  * Elias Rudberg, Emanuel H. Rubensson, and Pawel Salek,
00022  * J. Chem. Theory Comput. 7, 340 (2011),
00023  * <http://dx.doi.org/10.1021/ct100611z>
00024  * 
00025  * For further information about Ergo, see <http://www.ergoscf.org>.
00026  */
00027  
00028  /* This file belongs to the template_lapack part of the Ergo source 
00029   * code. The source files in the template_lapack directory are modified
00030   * versions of files originally distributed as CLAPACK, see the
00031   * Copyright/license notice in the file template_lapack/COPYING.
00032   */
00033  
00034 
00035 #ifndef TEMPLATE_LAPACK_SYTRD_HEADER
00036 #define TEMPLATE_LAPACK_SYTRD_HEADER
00037 
00038 #include "template_lapack_common.h"
00039 
00040 template<class Treal>
00041 int template_lapack_sytrd(const char *uplo, const integer *n, Treal *a, const integer *
00042         lda, Treal *d__, Treal *e, Treal *tau, Treal *
00043         work, const integer *lwork, integer *info)
00044 {
00045 /*  -- LAPACK routine (version 3.0) --   
00046        Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,   
00047        Courant Institute, Argonne National Lab, and Rice University   
00048        June 30, 1999   
00049 
00050 
00051     Purpose   
00052     =======   
00053 
00054     DSYTRD reduces a real symmetric matrix A to real symmetric   
00055     tridiagonal form T by an orthogonal similarity transformation:   
00056     Q**T * A * Q = T.   
00057 
00058     Arguments   
00059     =========   
00060 
00061     UPLO    (input) CHARACTER*1   
00062             = 'U':  Upper triangle of A is stored;   
00063             = 'L':  Lower triangle of A is stored.   
00064 
00065     N       (input) INTEGER   
00066             The order of the matrix A.  N >= 0.   
00067 
00068     A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)   
00069             On entry, the symmetric matrix A.  If UPLO = 'U', the leading   
00070             N-by-N upper triangular part of A contains the upper   
00071             triangular part of the matrix A, and the strictly lower   
00072             triangular part of A is not referenced.  If UPLO = 'L', the   
00073             leading N-by-N lower triangular part of A contains the lower   
00074             triangular part of the matrix A, and the strictly upper   
00075             triangular part of A is not referenced.   
00076             On exit, if UPLO = 'U', the diagonal and first superdiagonal   
00077             of A are overwritten by the corresponding elements of the   
00078             tridiagonal matrix T, and the elements above the first   
00079             superdiagonal, with the array TAU, represent the orthogonal   
00080             matrix Q as a product of elementary reflectors; if UPLO   
00081             = 'L', the diagonal and first subdiagonal of A are over-   
00082             written by the corresponding elements of the tridiagonal   
00083             matrix T, and the elements below the first subdiagonal, with   
00084             the array TAU, represent the orthogonal matrix Q as a product   
00085             of elementary reflectors. See Further Details.   
00086 
00087     LDA     (input) INTEGER   
00088             The leading dimension of the array A.  LDA >= max(1,N).   
00089 
00090     D       (output) DOUBLE PRECISION array, dimension (N)   
00091             The diagonal elements of the tridiagonal matrix T:   
00092             D(i) = A(i,i).   
00093 
00094     E       (output) DOUBLE PRECISION array, dimension (N-1)   
00095             The off-diagonal elements of the tridiagonal matrix T:   
00096             E(i) = A(i,i+1) if UPLO = 'U', E(i) = A(i+1,i) if UPLO = 'L'.   
00097 
00098     TAU     (output) DOUBLE PRECISION array, dimension (N-1)   
00099             The scalar factors of the elementary reflectors (see Further   
00100             Details).   
00101 
00102     WORK    (workspace/output) DOUBLE PRECISION array, dimension (LWORK)   
00103             On exit, if INFO = 0, WORK(1) returns the optimal LWORK.   
00104 
00105     LWORK   (input) INTEGER   
00106             The dimension of the array WORK.  LWORK >= 1.   
00107             For optimum performance LWORK >= N*NB, where NB is the   
00108             optimal blocksize.   
00109 
00110             If LWORK = -1, then a workspace query is assumed; the routine   
00111             only calculates the optimal size of the WORK array, returns   
00112             this value as the first entry of the WORK array, and no error   
00113             message related to LWORK is issued by XERBLA.   
00114 
00115     INFO    (output) INTEGER   
00116             = 0:  successful exit   
00117             < 0:  if INFO = -i, the i-th argument had an illegal value   
00118 
00119     Further Details   
00120     ===============   
00121 
00122     If UPLO = 'U', the matrix Q is represented as a product of elementary   
00123     reflectors   
00124 
00125        Q = H(n-1) . . . H(2) H(1).   
00126 
00127     Each H(i) has the form   
00128 
00129        H(i) = I - tau * v * v'   
00130 
00131     where tau is a real scalar, and v is a real vector with   
00132     v(i+1:n) = 0 and v(i) = 1; v(1:i-1) is stored on exit in   
00133     A(1:i-1,i+1), and tau in TAU(i).   
00134 
00135     If UPLO = 'L', the matrix Q is represented as a product of elementary   
00136     reflectors   
00137 
00138        Q = H(1) H(2) . . . H(n-1).   
00139 
00140     Each H(i) has the form   
00141 
00142        H(i) = I - tau * v * v'   
00143 
00144     where tau is a real scalar, and v is a real vector with   
00145     v(1:i) = 0 and v(i+1) = 1; v(i+2:n) is stored on exit in A(i+2:n,i),   
00146     and tau in TAU(i).   
00147 
00148     The contents of A on exit are illustrated by the following examples   
00149     with n = 5:   
00150 
00151     if UPLO = 'U':                       if UPLO = 'L':   
00152 
00153       (  d   e   v2  v3  v4 )              (  d                  )   
00154       (      d   e   v3  v4 )              (  e   d              )   
00155       (          d   e   v4 )              (  v1  e   d          )   
00156       (              d   e  )              (  v1  v2  e   d      )   
00157       (                  d  )              (  v1  v2  v3  e   d  )   
00158 
00159     where d and e denote diagonal and off-diagonal elements of T, and vi   
00160     denotes an element of the vector defining H(i).   
00161 
00162     =====================================================================   
00163 
00164 
00165        Test the input parameters   
00166 
00167        Parameter adjustments */
00168     /* Table of constant values */
00169      integer c__1 = 1;
00170      integer c_n1 = -1;
00171      integer c__3 = 3;
00172      integer c__2 = 2;
00173      Treal c_b22 = -1.;
00174      Treal c_b23 = 1.;
00175     
00176     /* System generated locals */
00177     integer a_dim1, a_offset, i__1, i__2, i__3;
00178     /* Local variables */
00179      integer i__, j;
00180      integer nbmin, iinfo;
00181      logical upper;
00182      integer nb, kk, nx;
00183      integer ldwork, lwkopt;
00184      logical lquery;
00185      integer iws;
00186 #define a_ref(a_1,a_2) a[(a_2)*a_dim1 + a_1]
00187 
00188 
00189     a_dim1 = *lda;
00190     a_offset = 1 + a_dim1 * 1;
00191     a -= a_offset;
00192     --d__;
00193     --e;
00194     --tau;
00195     --work;
00196 
00197     /* Initialization added by Elias to get rid of compiler warnings. */
00198     lwkopt = 0;
00199     /* Function Body */
00200     *info = 0;
00201     upper = template_blas_lsame(uplo, "U");
00202     lquery = *lwork == -1;
00203     if (! upper && ! template_blas_lsame(uplo, "L")) {
00204         *info = -1;
00205     } else if (*n < 0) {
00206         *info = -2;
00207     } else if (*lda < maxMACRO(1,*n)) {
00208         *info = -4;
00209     } else if (*lwork < 1 && ! lquery) {
00210         *info = -9;
00211     }
00212 
00213     if (*info == 0) {
00214 
00215 /*        Determine the block size. */
00216 
00217         nb = template_lapack_ilaenv(&c__1, "DSYTRD", uplo, n, &c_n1, &c_n1, &c_n1, (ftnlen)6,
00218                  (ftnlen)1);
00219         lwkopt = *n * nb;
00220         work[1] = (Treal) lwkopt;
00221     }
00222 
00223     if (*info != 0) {
00224         i__1 = -(*info);
00225         template_blas_erbla("SYTRD ", &i__1);
00226         return 0;
00227     } else if (lquery) {
00228         return 0;
00229     }
00230 
00231 /*     Quick return if possible */
00232 
00233     if (*n == 0) {
00234         work[1] = 1.;
00235         return 0;
00236     }
00237 
00238     nx = *n;
00239     iws = 1;
00240     if (nb > 1 && nb < *n) {
00241 
00242 /*        Determine when to cross over from blocked to unblocked code   
00243           (last block is always handled by unblocked code).   
00244 
00245    Computing MAX */
00246         i__1 = nb, i__2 = template_lapack_ilaenv(&c__3, "DSYTRD", uplo, n, &c_n1, &c_n1, &
00247                 c_n1, (ftnlen)6, (ftnlen)1);
00248         nx = maxMACRO(i__1,i__2);
00249         if (nx < *n) {
00250 
00251 /*           Determine if workspace is large enough for blocked code. */
00252 
00253             ldwork = *n;
00254             iws = ldwork * nb;
00255             if (*lwork < iws) {
00256 
00257 /*              Not enough workspace to use optimal NB:  determine the   
00258                 minimum value of NB, and reduce NB or force use of   
00259                 unblocked code by setting NX = N.   
00260 
00261    Computing MAX */
00262                 i__1 = *lwork / ldwork;
00263                 nb = maxMACRO(i__1,1);
00264                 nbmin = template_lapack_ilaenv(&c__2, "DSYTRD", uplo, n, &c_n1, &c_n1, &c_n1,
00265                          (ftnlen)6, (ftnlen)1);
00266                 if (nb < nbmin) {
00267                     nx = *n;
00268                 }
00269             }
00270         } else {
00271             nx = *n;
00272         }
00273     } else {
00274         nb = 1;
00275     }
00276 
00277     if (upper) {
00278 
00279 /*        Reduce the upper triangle of A.   
00280           Columns 1:kk are handled by the unblocked method. */
00281 
00282         kk = *n - (*n - nx + nb - 1) / nb * nb;
00283         i__1 = kk + 1;
00284         i__2 = -nb;
00285         for (i__ = *n - nb + 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += 
00286                 i__2) {
00287 
00288 /*           Reduce columns i:i+nb-1 to tridiagonal form and form the   
00289              matrix W which is needed to update the unreduced part of   
00290              the matrix */
00291 
00292             i__3 = i__ + nb - 1;
00293             template_lapack_latrd(uplo, &i__3, &nb, &a[a_offset], lda, &e[1], &tau[1], &
00294                     work[1], &ldwork);
00295 
00296 /*           Update the unreduced submatrix A(1:i-1,1:i-1), using an   
00297              update of the form:  A := A - V*W' - W*V' */
00298 
00299             i__3 = i__ - 1;
00300             template_blas_syr2k(uplo, "No transpose", &i__3, &nb, &c_b22, &a_ref(1, i__), 
00301                     lda, &work[1], &ldwork, &c_b23, &a[a_offset], lda);
00302 
00303 /*           Copy superdiagonal elements back into A, and diagonal   
00304              elements into D */
00305 
00306             i__3 = i__ + nb - 1;
00307             for (j = i__; j <= i__3; ++j) {
00308                 a_ref(j - 1, j) = e[j - 1];
00309                 d__[j] = a_ref(j, j);
00310 /* L10: */
00311             }
00312 /* L20: */
00313         }
00314 
00315 /*        Use unblocked code to reduce the last or only block */
00316 
00317         template_lapack_sytd2(uplo, &kk, &a[a_offset], lda, &d__[1], &e[1], &tau[1], &iinfo);
00318     } else {
00319 
00320 /*        Reduce the lower triangle of A */
00321 
00322         i__2 = *n - nx;
00323         i__1 = nb;
00324         for (i__ = 1; i__1 < 0 ? i__ >= i__2 : i__ <= i__2; i__ += i__1) {
00325 
00326 /*           Reduce columns i:i+nb-1 to tridiagonal form and form the   
00327              matrix W which is needed to update the unreduced part of   
00328              the matrix */
00329 
00330             i__3 = *n - i__ + 1;
00331             template_lapack_latrd(uplo, &i__3, &nb, &a_ref(i__, i__), lda, &e[i__], &tau[
00332                     i__], &work[1], &ldwork);
00333 
00334 /*           Update the unreduced submatrix A(i+ib:n,i+ib:n), using   
00335              an update of the form:  A := A - V*W' - W*V' */
00336 
00337             i__3 = *n - i__ - nb + 1;
00338             template_blas_syr2k(uplo, "No transpose", &i__3, &nb, &c_b22, &a_ref(i__ + nb,
00339                      i__), lda, &work[nb + 1], &ldwork, &c_b23, &a_ref(i__ + 
00340                     nb, i__ + nb), lda);
00341 
00342 /*           Copy subdiagonal elements back into A, and diagonal   
00343              elements into D */
00344 
00345             i__3 = i__ + nb - 1;
00346             for (j = i__; j <= i__3; ++j) {
00347                 a_ref(j + 1, j) = e[j];
00348                 d__[j] = a_ref(j, j);
00349 /* L30: */
00350             }
00351 /* L40: */
00352         }
00353 
00354 /*        Use unblocked code to reduce the last or only block */
00355 
00356         i__1 = *n - i__ + 1;
00357         template_lapack_sytd2(uplo, &i__1, &a_ref(i__, i__), lda, &d__[i__], &e[i__], &tau[
00358                 i__], &iinfo);
00359     }
00360 
00361     work[1] = (Treal) lwkopt;
00362     return 0;
00363 
00364 /*     End of DSYTRD */
00365 
00366 } /* dsytrd_ */
00367 
00368 #undef a_ref
00369 
00370 
00371 #endif