ergo
template_lapack_lasq3.h
Go to the documentation of this file.
1 /* Ergo, version 3.7, a program for linear scaling electronic structure
2  * calculations.
3  * Copyright (C) 2018 Elias Rudberg, Emanuel H. Rubensson, Pawel Salek,
4  * and Anastasia Kruchinina.
5  *
6  * This program is free software: you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation, either version 3 of the License, or
9  * (at your option) any later version.
10  *
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program. If not, see <http://www.gnu.org/licenses/>.
18  *
19  * Primary academic reference:
20  * Ergo: An open-source program for linear-scaling electronic structure
21  * calculations,
22  * Elias Rudberg, Emanuel H. Rubensson, Pawel Salek, and Anastasia
23  * Kruchinina,
24  * SoftwareX 7, 107 (2018),
25  * <http://dx.doi.org/10.1016/j.softx.2018.03.005>
26  *
27  * For further information about Ergo, see <http://www.ergoscf.org>.
28  */
29 
30  /* This file belongs to the template_lapack part of the Ergo source
31  * code. The source files in the template_lapack directory are modified
32  * versions of files originally distributed as CLAPACK, see the
33  * Copyright/license notice in the file template_lapack/COPYING.
34  */
35 
36 
37 #ifndef TEMPLATE_LAPACK_LASQ3_HEADER
38 #define TEMPLATE_LAPACK_LASQ3_HEADER
39 
40 
41 #include "template_lapack_lasq4.h"
42 #include "template_lapack_lasq5.h"
43 #include "template_lapack_lasq6.h"
44 
45 
46 template<class Treal>
47 int template_lapack_lasq3(integer *i0, integer *n0, Treal *z__,
48  integer *pp, Treal *dmin__, Treal *sigma, Treal *desig,
49  Treal *qmax, integer *nfail, integer *iter, integer *ndiv,
50  logical *ieee, integer *ttype, Treal *dmin1, Treal *dmin2,
51  Treal *dn, Treal *dn1, Treal *dn2, Treal *g,
52  Treal *tau)
53 {
54  /* System generated locals */
55  integer i__1;
56  Treal d__1, d__2;
57 
58 
59  /* Local variables */
60  Treal s, t;
61  integer j4, nn;
62  Treal eps, tol;
63  integer n0in, ipn4;
64  Treal tol2, temp;
65 
66 
67 /* -- LAPACK routine (version 3.2) -- */
68 
69 /* -- Contributed by Osni Marques of the Lawrence Berkeley National -- */
70 /* -- Laboratory and Beresford Parlett of the Univ. of California at -- */
71 /* -- Berkeley -- */
72 /* -- November 2008 -- */
73 
74 /* -- LAPACK is a software package provided by Univ. of Tennessee, -- */
75 /* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
76 
77 /* .. Scalar Arguments .. */
78 /* .. */
79 /* .. Array Arguments .. */
80 /* .. */
81 
82 /* Purpose */
83 /* ======= */
84 
85 /* DLASQ3 checks for deflation, computes a shift (TAU) and calls dqds. */
86 /* In case of failure it changes shifts, and tries again until output */
87 /* is positive. */
88 
89 /* Arguments */
90 /* ========= */
91 
92 /* I0 (input) INTEGER */
93 /* First index. */
94 
95 /* N0 (input) INTEGER */
96 /* Last index. */
97 
98 /* Z (input) DOUBLE PRECISION array, dimension ( 4*N ) */
99 /* Z holds the qd array. */
100 
101 /* PP (input/output) INTEGER */
102 /* PP=0 for ping, PP=1 for pong. */
103 /* PP=2 indicates that flipping was applied to the Z array */
104 /* and that the initial tests for deflation should not be */
105 /* performed. */
106 
107 /* DMIN (output) DOUBLE PRECISION */
108 /* Minimum value of d. */
109 
110 /* SIGMA (output) DOUBLE PRECISION */
111 /* Sum of shifts used in current segment. */
112 
113 /* DESIG (input/output) DOUBLE PRECISION */
114 /* Lower order part of SIGMA */
115 
116 /* QMAX (input) DOUBLE PRECISION */
117 /* Maximum value of q. */
118 
119 /* NFAIL (output) INTEGER */
120 /* Number of times shift was too big. */
121 
122 /* ITER (output) INTEGER */
123 /* Number of iterations. */
124 
125 /* NDIV (output) INTEGER */
126 /* Number of divisions. */
127 
128 /* IEEE (input) LOGICAL */
129 /* Flag for IEEE or non IEEE arithmetic (passed to DLASQ5). */
130 
131 /* TTYPE (input/output) INTEGER */
132 /* Shift type. */
133 
134 /* DMIN1, DMIN2, DN, DN1, DN2, G, TAU (input/output) DOUBLE PRECISION */
135 /* These are passed as arguments in order to save their values */
136 /* between calls to DLASQ3. */
137 
138 /* ===================================================================== */
139 
140 /* .. Parameters .. */
141 /* .. */
142 /* .. Local Scalars .. */
143 /* .. */
144 /* .. External Subroutines .. */
145 /* .. */
146 /* .. External Function .. */
147 /* .. */
148 /* .. Intrinsic Functions .. */
149 /* .. */
150 /* .. Executable Statements .. */
151 
152  /* Parameter adjustments */
153  --z__;
154 
155  /* Function Body */
156  n0in = *n0;
157  eps = template_lapack_lamch("Precision", (Treal)0);
158  tol = eps * 100.;
159 /* Computing 2nd power */
160  d__1 = tol;
161  tol2 = d__1 * d__1;
162 
163 /* Check for deflation. */
164 
165 L10:
166 
167  if (*n0 < *i0) {
168  return 0;
169  }
170  if (*n0 == *i0) {
171  goto L20;
172  }
173  nn = (*n0 << 2) + *pp;
174  if (*n0 == *i0 + 1) {
175  goto L40;
176  }
177 
178 /* Check whether E(N0-1) is negligible, 1 eigenvalue. */
179 
180  if (z__[nn - 5] > tol2 * (*sigma + z__[nn - 3]) && z__[nn - (*pp << 1) -
181  4] > tol2 * z__[nn - 7]) {
182  goto L30;
183  }
184 
185 L20:
186 
187  z__[(*n0 << 2) - 3] = z__[(*n0 << 2) + *pp - 3] + *sigma;
188  --(*n0);
189  goto L10;
190 
191 /* Check whether E(N0-2) is negligible, 2 eigenvalues. */
192 
193 L30:
194 
195  if (z__[nn - 9] > tol2 * *sigma && z__[nn - (*pp << 1) - 8] > tol2 * z__[
196  nn - 11]) {
197  goto L50;
198  }
199 
200 L40:
201 
202  if (z__[nn - 3] > z__[nn - 7]) {
203  s = z__[nn - 3];
204  z__[nn - 3] = z__[nn - 7];
205  z__[nn - 7] = s;
206  }
207  if (z__[nn - 5] > z__[nn - 3] * tol2) {
208  t = (z__[nn - 7] - z__[nn - 3] + z__[nn - 5]) * .5;
209  s = z__[nn - 3] * (z__[nn - 5] / t);
210  if (s <= t) {
211  s = z__[nn - 3] * (z__[nn - 5] / (t * (template_blas_sqrt(s / t + 1.) + 1.)));
212  } else {
213  s = z__[nn - 3] * (z__[nn - 5] / (t + template_blas_sqrt(t) * template_blas_sqrt(t + s)));
214  }
215  t = z__[nn - 7] + (s + z__[nn - 5]);
216  z__[nn - 3] *= z__[nn - 7] / t;
217  z__[nn - 7] = t;
218  }
219  z__[(*n0 << 2) - 7] = z__[nn - 7] + *sigma;
220  z__[(*n0 << 2) - 3] = z__[nn - 3] + *sigma;
221  *n0 += -2;
222  goto L10;
223 
224 L50:
225  if (*pp == 2) {
226  *pp = 0;
227  }
228 
229 /* Reverse the qd-array, if warranted. */
230 
231  if (*dmin__ <= 0. || *n0 < n0in) {
232  if (z__[(*i0 << 2) + *pp - 3] * 1.5 < z__[(*n0 << 2) + *pp - 3]) {
233  ipn4 = ( *i0 + *n0 ) << 2;
234  i__1 = ( *i0 + *n0 - 1 ) << 1;
235  for (j4 = *i0 << 2; j4 <= i__1; j4 += 4) {
236  temp = z__[j4 - 3];
237  z__[j4 - 3] = z__[ipn4 - j4 - 3];
238  z__[ipn4 - j4 - 3] = temp;
239  temp = z__[j4 - 2];
240  z__[j4 - 2] = z__[ipn4 - j4 - 2];
241  z__[ipn4 - j4 - 2] = temp;
242  temp = z__[j4 - 1];
243  z__[j4 - 1] = z__[ipn4 - j4 - 5];
244  z__[ipn4 - j4 - 5] = temp;
245  temp = z__[j4];
246  z__[j4] = z__[ipn4 - j4 - 4];
247  z__[ipn4 - j4 - 4] = temp;
248 /* L60: */
249  }
250  if (*n0 - *i0 <= 4) {
251  z__[(*n0 << 2) + *pp - 1] = z__[(*i0 << 2) + *pp - 1];
252  z__[(*n0 << 2) - *pp] = z__[(*i0 << 2) - *pp];
253  }
254 /* Computing MIN */
255  d__1 = *dmin2, d__2 = z__[(*n0 << 2) + *pp - 1];
256  *dmin2 = minMACRO(d__1,d__2);
257 /* Computing MIN */
258  d__1 = z__[(*n0 << 2) + *pp - 1], d__2 = z__[(*i0 << 2) + *pp - 1]
259  , d__1 = minMACRO(d__1,d__2), d__2 = z__[(*i0 << 2) + *pp + 3];
260  z__[(*n0 << 2) + *pp - 1] = minMACRO(d__1,d__2);
261 /* Computing MIN */
262  d__1 = z__[(*n0 << 2) - *pp], d__2 = z__[(*i0 << 2) - *pp], d__1 =
263  minMACRO(d__1,d__2), d__2 = z__[(*i0 << 2) - *pp + 4];
264  z__[(*n0 << 2) - *pp] = minMACRO(d__1,d__2);
265 /* Computing MAX */
266  d__1 = *qmax, d__2 = z__[(*i0 << 2) + *pp - 3], d__1 = maxMACRO(d__1,
267  d__2), d__2 = z__[(*i0 << 2) + *pp + 1];
268  *qmax = maxMACRO(d__1,d__2);
269  *dmin__ = -0.;
270  }
271  }
272 
273 /* Choose a shift. */
274 
275  template_lapack_lasq4(i0, n0, &z__[1], pp, &n0in, dmin__, dmin1, dmin2, dn, dn1, dn2,
276  tau, ttype, g);
277 
278 /* Call dqds until DMIN > 0. */
279 
280 L70:
281 
282  template_lapack_lasq5(i0, n0, &z__[1], pp, tau, dmin__, dmin1, dmin2, dn, dn1, dn2,
283  ieee);
284 
285  *ndiv += *n0 - *i0 + 2;
286  ++(*iter);
287 
288 /* Check status. */
289 
290  if (*dmin__ >= 0. && *dmin1 > 0.) {
291 
292 /* Success. */
293 
294  goto L90;
295 
296  } else if (*dmin__ < 0. && *dmin1 > 0. && z__[( ( *n0 - 1 ) << 2) - *pp] < tol
297  * (*sigma + *dn1) && absMACRO(*dn) < tol * *sigma) {
298 
299 /* Convergence hidden by negative DN. */
300 
301  z__[( ( *n0 - 1 ) << 2) - *pp + 2] = 0.;
302  *dmin__ = 0.;
303  goto L90;
304  } else if (*dmin__ < 0.) {
305 
306 /* TAU too big. Select new TAU and try again. */
307 
308  ++(*nfail);
309  if (*ttype < -22) {
310 
311 /* Failed twice. Play it safe. */
312 
313  *tau = 0.;
314  } else if (*dmin1 > 0.) {
315 
316 /* Late failure. Gives excellent shift. */
317 
318  *tau = (*tau + *dmin__) * (1. - eps * 2.);
319  *ttype += -11;
320  } else {
321 
322 /* Early failure. Divide by 4. */
323 
324  *tau *= .25;
325  *ttype += -12;
326  }
327  goto L70;
328  } else if (template_lapack_isnan(dmin__)) {
329 
330 /* NaN. */
331 
332  if (*tau == 0.) {
333  goto L80;
334  } else {
335  *tau = 0.;
336  goto L70;
337  }
338  } else {
339 
340 /* Possible underflow. Play it safe. */
341 
342  goto L80;
343  }
344 
345 /* Risk of underflow. */
346 
347 L80:
348  template_lapack_lasq6(i0, n0, &z__[1], pp, dmin__, dmin1, dmin2, dn, dn1, dn2);
349  *ndiv += *n0 - *i0 + 2;
350  ++(*iter);
351  *tau = 0.;
352 
353 L90:
354  if (*tau < *sigma) {
355  *desig += *tau;
356  t = *sigma + *desig;
357  *desig -= t - *sigma;
358  } else {
359  t = *sigma + *tau;
360  *desig = *sigma - (t - *tau) + *desig;
361  }
362  *sigma = t;
363 
364  return 0;
365 
366 /* End of DLASQ3 */
367 
368 } /* dlasq3_ */
369 
370 #endif
#define absMACRO(x)
Definition: template_blas_common.h:47
logical template_lapack_isnan(Treal *din)
Definition: template_lapack_isnan.h:45
int integer
Definition: template_blas_common.h:40
#define maxMACRO(a, b)
Definition: template_blas_common.h:45
int template_lapack_lasq6(integer *i0, integer *n0, Treal *z__, integer *pp, Treal *dmin__, Treal *dmin1, Treal *dmin2, Treal *dn, Treal *dnm1, Treal *dnm2)
Definition: template_lapack_lasq6.h:41
#define minMACRO(a, b)
Definition: template_blas_common.h:46
int template_lapack_lasq5(integer *i0, integer *n0, Treal *z__, integer *pp, Treal *tau, Treal *dmin__, Treal *dmin1, Treal *dmin2, Treal *dn, Treal *dnm1, Treal *dnm2, logical *ieee)
Definition: template_lapack_lasq5.h:41
int template_lapack_lasq3(integer *i0, integer *n0, Treal *z__, integer *pp, Treal *dmin__, Treal *sigma, Treal *desig, Treal *qmax, integer *nfail, integer *iter, integer *ndiv, logical *ieee, integer *ttype, Treal *dmin1, Treal *dmin2, Treal *dn, Treal *dn1, Treal *dn2, Treal *g, Treal *tau)
Definition: template_lapack_lasq3.h:47
Treal template_lapack_lamch(const char *cmach, Treal dummyReal)
Definition: template_lapack_lamch.h:202
bool logical
Definition: template_blas_common.h:41
Treal template_blas_sqrt(Treal x)
int template_lapack_lasq4(integer *i0, integer *n0, Treal *z__, integer *pp, integer *n0in, Treal *dmin__, Treal *dmin1, Treal *dmin2, Treal *dn, Treal *dn1, Treal *dn2, Treal *tau, integer *ttype, Treal *g)
Definition: template_lapack_lasq4.h:41