ergo
|
00001 /* Ergo, version 3.2, a program for linear scaling electronic structure 00002 * calculations. 00003 * Copyright (C) 2012 Elias Rudberg, Emanuel H. Rubensson, and Pawel Salek. 00004 * 00005 * This program is free software: you can redistribute it and/or modify 00006 * it under the terms of the GNU General Public License as published by 00007 * the Free Software Foundation, either version 3 of the License, or 00008 * (at your option) any later version. 00009 * 00010 * This program is distributed in the hope that it will be useful, 00011 * but WITHOUT ANY WARRANTY; without even the implied warranty of 00012 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00013 * GNU General Public License for more details. 00014 * 00015 * You should have received a copy of the GNU General Public License 00016 * along with this program. If not, see <http://www.gnu.org/licenses/>. 00017 * 00018 * Primary academic reference: 00019 * KohnâSham Density Functional Theory Electronic Structure Calculations 00020 * with Linearly Scaling Computational Time and Memory Usage, 00021 * Elias Rudberg, Emanuel H. Rubensson, and Pawel Salek, 00022 * J. Chem. Theory Comput. 7, 340 (2011), 00023 * <http://dx.doi.org/10.1021/ct100611z> 00024 * 00025 * For further information about Ergo, see <http://www.ergoscf.org>. 00026 */ 00027 00028 /* This file belongs to the template_lapack part of the Ergo source 00029 * code. The source files in the template_lapack directory are modified 00030 * versions of files originally distributed as CLAPACK, see the 00031 * Copyright/license notice in the file template_lapack/COPYING. 00032 */ 00033 00034 00035 #ifndef TEMPLATE_LAPACK_LASQ5_HEADER 00036 #define TEMPLATE_LAPACK_LASQ5_HEADER 00037 00038 template<class Treal> 00039 int template_lapack_lasq5(integer *i0, integer *n0, Treal *z__, 00040 integer *pp, Treal *tau, Treal *dmin__, Treal *dmin1, 00041 Treal *dmin2, Treal *dn, Treal *dnm1, Treal *dnm2, 00042 logical *ieee) 00043 { 00044 /* System generated locals */ 00045 integer i__1; 00046 Treal d__1, d__2; 00047 00048 /* Local variables */ 00049 Treal d__; 00050 integer j4, j4p2; 00051 Treal emin, temp; 00052 00053 00054 /* -- LAPACK routine (version 3.2) -- */ 00055 00056 /* -- Contributed by Osni Marques of the Lawrence Berkeley National -- */ 00057 /* -- Laboratory and Beresford Parlett of the Univ. of California at -- */ 00058 /* -- Berkeley -- */ 00059 /* -- November 2008 -- */ 00060 00061 /* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ 00062 /* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ 00063 00064 /* .. Scalar Arguments .. */ 00065 /* .. */ 00066 /* .. Array Arguments .. */ 00067 /* .. */ 00068 00069 /* Purpose */ 00070 /* ======= */ 00071 00072 /* DLASQ5 computes one dqds transform in ping-pong form, one */ 00073 /* version for IEEE machines another for non IEEE machines. */ 00074 00075 /* Arguments */ 00076 /* ========= */ 00077 00078 /* I0 (input) INTEGER */ 00079 /* First index. */ 00080 00081 /* N0 (input) INTEGER */ 00082 /* Last index. */ 00083 00084 /* Z (input) DOUBLE PRECISION array, dimension ( 4*N ) */ 00085 /* Z holds the qd array. EMIN is stored in Z(4*N0) to avoid */ 00086 /* an extra argument. */ 00087 00088 /* PP (input) INTEGER */ 00089 /* PP=0 for ping, PP=1 for pong. */ 00090 00091 /* TAU (input) DOUBLE PRECISION */ 00092 /* This is the shift. */ 00093 00094 /* DMIN (output) DOUBLE PRECISION */ 00095 /* Minimum value of d. */ 00096 00097 /* DMIN1 (output) DOUBLE PRECISION */ 00098 /* Minimum value of d, excluding D( N0 ). */ 00099 00100 /* DMIN2 (output) DOUBLE PRECISION */ 00101 /* Minimum value of d, excluding D( N0 ) and D( N0-1 ). */ 00102 00103 /* DN (output) DOUBLE PRECISION */ 00104 /* d(N0), the last value of d. */ 00105 00106 /* DNM1 (output) DOUBLE PRECISION */ 00107 /* d(N0-1). */ 00108 00109 /* DNM2 (output) DOUBLE PRECISION */ 00110 /* d(N0-2). */ 00111 00112 /* IEEE (input) LOGICAL */ 00113 /* Flag for IEEE or non IEEE arithmetic. */ 00114 00115 /* ===================================================================== */ 00116 00117 /* .. Parameter .. */ 00118 /* .. */ 00119 /* .. Local Scalars .. */ 00120 /* .. */ 00121 /* .. Intrinsic Functions .. */ 00122 /* .. */ 00123 /* .. Executable Statements .. */ 00124 00125 /* Parameter adjustments */ 00126 --z__; 00127 00128 /* Function Body */ 00129 if (*n0 - *i0 - 1 <= 0) { 00130 return 0; 00131 } 00132 00133 j4 = (*i0 << 2) + *pp - 3; 00134 emin = z__[j4 + 4]; 00135 d__ = z__[j4] - *tau; 00136 *dmin__ = d__; 00137 *dmin1 = -z__[j4]; 00138 00139 if (*ieee) { 00140 00141 /* Code for IEEE arithmetic. */ 00142 00143 if (*pp == 0) { 00144 i__1 = ( *n0 - 3 ) << 2; 00145 for (j4 = *i0 << 2; j4 <= i__1; j4 += 4) { 00146 z__[j4 - 2] = d__ + z__[j4 - 1]; 00147 temp = z__[j4 + 1] / z__[j4 - 2]; 00148 d__ = d__ * temp - *tau; 00149 *dmin__ = minMACRO(*dmin__,d__); 00150 z__[j4] = z__[j4 - 1] * temp; 00151 /* Computing MIN */ 00152 d__1 = z__[j4]; 00153 emin = minMACRO(d__1,emin); 00154 /* L10: */ 00155 } 00156 } else { 00157 i__1 = ( *n0 - 3 ) << 2; 00158 for (j4 = *i0 << 2; j4 <= i__1; j4 += 4) { 00159 z__[j4 - 3] = d__ + z__[j4]; 00160 temp = z__[j4 + 2] / z__[j4 - 3]; 00161 d__ = d__ * temp - *tau; 00162 *dmin__ = minMACRO(*dmin__,d__); 00163 z__[j4 - 1] = z__[j4] * temp; 00164 /* Computing MIN */ 00165 d__1 = z__[j4 - 1]; 00166 emin = minMACRO(d__1,emin); 00167 /* L20: */ 00168 } 00169 } 00170 00171 /* Unroll last two steps. */ 00172 00173 *dnm2 = d__; 00174 *dmin2 = *dmin__; 00175 j4 = ( ( *n0 - 2 ) << 2) - *pp; 00176 j4p2 = j4 + (*pp << 1) - 1; 00177 z__[j4 - 2] = *dnm2 + z__[j4p2]; 00178 z__[j4] = z__[j4p2 + 2] * (z__[j4p2] / z__[j4 - 2]); 00179 *dnm1 = z__[j4p2 + 2] * (*dnm2 / z__[j4 - 2]) - *tau; 00180 *dmin__ = minMACRO(*dmin__,*dnm1); 00181 00182 *dmin1 = *dmin__; 00183 j4 += 4; 00184 j4p2 = j4 + (*pp << 1) - 1; 00185 z__[j4 - 2] = *dnm1 + z__[j4p2]; 00186 z__[j4] = z__[j4p2 + 2] * (z__[j4p2] / z__[j4 - 2]); 00187 *dn = z__[j4p2 + 2] * (*dnm1 / z__[j4 - 2]) - *tau; 00188 *dmin__ = minMACRO(*dmin__,*dn); 00189 00190 } else { 00191 00192 /* Code for non IEEE arithmetic. */ 00193 00194 if (*pp == 0) { 00195 i__1 = ( *n0 - 3 ) << 2; 00196 for (j4 = *i0 << 2; j4 <= i__1; j4 += 4) { 00197 z__[j4 - 2] = d__ + z__[j4 - 1]; 00198 if (d__ < 0.) { 00199 return 0; 00200 } else { 00201 z__[j4] = z__[j4 + 1] * (z__[j4 - 1] / z__[j4 - 2]); 00202 d__ = z__[j4 + 1] * (d__ / z__[j4 - 2]) - *tau; 00203 } 00204 *dmin__ = minMACRO(*dmin__,d__); 00205 /* Computing MIN */ 00206 d__1 = emin, d__2 = z__[j4]; 00207 emin = minMACRO(d__1,d__2); 00208 /* L30: */ 00209 } 00210 } else { 00211 i__1 = ( *n0 - 3 ) << 2; 00212 for (j4 = *i0 << 2; j4 <= i__1; j4 += 4) { 00213 z__[j4 - 3] = d__ + z__[j4]; 00214 if (d__ < 0.) { 00215 return 0; 00216 } else { 00217 z__[j4 - 1] = z__[j4 + 2] * (z__[j4] / z__[j4 - 3]); 00218 d__ = z__[j4 + 2] * (d__ / z__[j4 - 3]) - *tau; 00219 } 00220 *dmin__ = minMACRO(*dmin__,d__); 00221 /* Computing MIN */ 00222 d__1 = emin, d__2 = z__[j4 - 1]; 00223 emin = minMACRO(d__1,d__2); 00224 /* L40: */ 00225 } 00226 } 00227 00228 /* Unroll last two steps. */ 00229 00230 *dnm2 = d__; 00231 *dmin2 = *dmin__; 00232 j4 = ( ( *n0 - 2 ) << 2) - *pp; 00233 j4p2 = j4 + (*pp << 1) - 1; 00234 z__[j4 - 2] = *dnm2 + z__[j4p2]; 00235 if (*dnm2 < 0.) { 00236 return 0; 00237 } else { 00238 z__[j4] = z__[j4p2 + 2] * (z__[j4p2] / z__[j4 - 2]); 00239 *dnm1 = z__[j4p2 + 2] * (*dnm2 / z__[j4 - 2]) - *tau; 00240 } 00241 *dmin__ = minMACRO(*dmin__,*dnm1); 00242 00243 *dmin1 = *dmin__; 00244 j4 += 4; 00245 j4p2 = j4 + (*pp << 1) - 1; 00246 z__[j4 - 2] = *dnm1 + z__[j4p2]; 00247 if (*dnm1 < 0.) { 00248 return 0; 00249 } else { 00250 z__[j4] = z__[j4p2 + 2] * (z__[j4p2] / z__[j4 - 2]); 00251 *dn = z__[j4p2 + 2] * (*dnm1 / z__[j4 - 2]) - *tau; 00252 } 00253 *dmin__ = minMACRO(*dmin__,*dn); 00254 00255 } 00256 00257 z__[j4 + 2] = *dn; 00258 z__[(*n0 << 2) - *pp] = emin; 00259 return 0; 00260 00261 /* End of DLASQ5 */ 00262 00263 } /* dlasq5_ */ 00264 00265 #endif