ergo
template_lapack_lasq4.h
Go to the documentation of this file.
1 /* Ergo, version 3.7, a program for linear scaling electronic structure
2  * calculations.
3  * Copyright (C) 2018 Elias Rudberg, Emanuel H. Rubensson, Pawel Salek,
4  * and Anastasia Kruchinina.
5  *
6  * This program is free software: you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation, either version 3 of the License, or
9  * (at your option) any later version.
10  *
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program. If not, see <http://www.gnu.org/licenses/>.
18  *
19  * Primary academic reference:
20  * Ergo: An open-source program for linear-scaling electronic structure
21  * calculations,
22  * Elias Rudberg, Emanuel H. Rubensson, Pawel Salek, and Anastasia
23  * Kruchinina,
24  * SoftwareX 7, 107 (2018),
25  * <http://dx.doi.org/10.1016/j.softx.2018.03.005>
26  *
27  * For further information about Ergo, see <http://www.ergoscf.org>.
28  */
29 
30  /* This file belongs to the template_lapack part of the Ergo source
31  * code. The source files in the template_lapack directory are modified
32  * versions of files originally distributed as CLAPACK, see the
33  * Copyright/license notice in the file template_lapack/COPYING.
34  */
35 
36 
37 #ifndef TEMPLATE_LAPACK_LASQ4_HEADER
38 #define TEMPLATE_LAPACK_LASQ4_HEADER
39 
40 template<class Treal>
41 int template_lapack_lasq4(integer *i0, integer *n0, Treal *z__,
42  integer *pp, integer *n0in, Treal *dmin__, Treal *dmin1,
43  Treal *dmin2, Treal *dn, Treal *dn1, Treal *dn2,
44  Treal *tau, integer *ttype, Treal *g)
45 {
46  /* System generated locals */
47  integer i__1;
48  Treal d__1, d__2;
49 
50 
51  /* Local variables */
52  Treal s = 0; // EMANUEL COMMENT: initialize to get rid of compiler warning
53  Treal a2, b1, b2;
54  integer i4, nn, np;
55  Treal gam, gap1, gap2;
56 
57 
58 /* -- LAPACK routine (version 3.2) -- */
59 
60 /* -- Contributed by Osni Marques of the Lawrence Berkeley National -- */
61 /* -- Laboratory and Beresford Parlett of the Univ. of California at -- */
62 /* -- Berkeley -- */
63 /* -- November 2008 -- */
64 
65 /* -- LAPACK is a software package provided by Univ. of Tennessee, -- */
66 /* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
67 
68 /* .. Scalar Arguments .. */
69 /* .. */
70 /* .. Array Arguments .. */
71 /* .. */
72 
73 /* Purpose */
74 /* ======= */
75 
76 /* DLASQ4 computes an approximation TAU to the smallest eigenvalue */
77 /* using values of d from the previous transform. */
78 
79 /* I0 (input) INTEGER */
80 /* First index. */
81 
82 /* N0 (input) INTEGER */
83 /* Last index. */
84 
85 /* Z (input) DOUBLE PRECISION array, dimension ( 4*N ) */
86 /* Z holds the qd array. */
87 
88 /* PP (input) INTEGER */
89 /* PP=0 for ping, PP=1 for pong. */
90 
91 /* NOIN (input) INTEGER */
92 /* The value of N0 at start of EIGTEST. */
93 
94 /* DMIN (input) DOUBLE PRECISION */
95 /* Minimum value of d. */
96 
97 /* DMIN1 (input) DOUBLE PRECISION */
98 /* Minimum value of d, excluding D( N0 ). */
99 
100 /* DMIN2 (input) DOUBLE PRECISION */
101 /* Minimum value of d, excluding D( N0 ) and D( N0-1 ). */
102 
103 /* DN (input) DOUBLE PRECISION */
104 /* d(N) */
105 
106 /* DN1 (input) DOUBLE PRECISION */
107 /* d(N-1) */
108 
109 /* DN2 (input) DOUBLE PRECISION */
110 /* d(N-2) */
111 
112 /* TAU (output) DOUBLE PRECISION */
113 /* This is the shift. */
114 
115 /* TTYPE (output) INTEGER */
116 /* Shift type. */
117 
118 /* G (input/output) REAL */
119 /* G is passed as an argument in order to save its value between */
120 /* calls to DLASQ4. */
121 
122 /* Further Details */
123 /* =============== */
124 /* CNST1 = 9/16 */
125 
126 /* ===================================================================== */
127 
128 /* .. Parameters .. */
129 /* .. */
130 /* .. Local Scalars .. */
131 /* .. */
132 /* .. Intrinsic Functions .. */
133 /* .. */
134 /* .. Executable Statements .. */
135 
136 /* A negative DMIN forces the shift to take that absolute value */
137 /* TTYPE records the type of shift. */
138 
139  /* Parameter adjustments */
140  --z__;
141 
142  /* Function Body */
143  if (*dmin__ <= 0.) {
144  *tau = -(*dmin__);
145  *ttype = -1;
146  return 0;
147  }
148 
149  nn = (*n0 << 2) + *pp;
150  if (*n0in == *n0) {
151 
152 /* No eigenvalues deflated. */
153 
154  if (*dmin__ == *dn || *dmin__ == *dn1) {
155 
156  b1 = template_blas_sqrt(z__[nn - 3]) * template_blas_sqrt(z__[nn - 5]);
157  b2 = template_blas_sqrt(z__[nn - 7]) * template_blas_sqrt(z__[nn - 9]);
158  a2 = z__[nn - 7] + z__[nn - 5];
159 
160 /* Cases 2 and 3. */
161 
162  if (*dmin__ == *dn && *dmin1 == *dn1) {
163  gap2 = *dmin2 - a2 - *dmin2 * .25;
164  if (gap2 > 0. && gap2 > b2) {
165  gap1 = a2 - *dn - b2 / gap2 * b2;
166  } else {
167  gap1 = a2 - *dn - (b1 + b2);
168  }
169  if (gap1 > 0. && gap1 > b1) {
170 /* Computing MAX */
171  d__1 = *dn - b1 / gap1 * b1, d__2 = *dmin__ * .5;
172  s = maxMACRO(d__1,d__2);
173  *ttype = -2;
174  } else {
175  s = 0.;
176  if (*dn > b1) {
177  s = *dn - b1;
178  }
179  if (a2 > b1 + b2) {
180 /* Computing MIN */
181  d__1 = s, d__2 = a2 - (b1 + b2);
182  s = minMACRO(d__1,d__2);
183  }
184 /* Computing MAX */
185  d__1 = s, d__2 = *dmin__ * .333;
186  s = maxMACRO(d__1,d__2);
187  *ttype = -3;
188  }
189  } else {
190 
191 /* Case 4. */
192 
193  *ttype = -4;
194  s = *dmin__ * .25;
195  if (*dmin__ == *dn) {
196  gam = *dn;
197  a2 = 0.;
198  if (z__[nn - 5] > z__[nn - 7]) {
199  return 0;
200  }
201  b2 = z__[nn - 5] / z__[nn - 7];
202  np = nn - 9;
203  } else {
204  np = nn - (*pp << 1);
205  b2 = z__[np - 2];
206  gam = *dn1;
207  if (z__[np - 4] > z__[np - 2]) {
208  return 0;
209  }
210  a2 = z__[np - 4] / z__[np - 2];
211  if (z__[nn - 9] > z__[nn - 11]) {
212  return 0;
213  }
214  b2 = z__[nn - 9] / z__[nn - 11];
215  np = nn - 13;
216  }
217 
218 /* Approximate contribution to norm squared from I < NN-1. */
219 
220  a2 += b2;
221  i__1 = (*i0 << 2) - 1 + *pp;
222  for (i4 = np; i4 >= i__1; i4 += -4) {
223  if (b2 == 0.) {
224  goto L20;
225  }
226  b1 = b2;
227  if (z__[i4] > z__[i4 - 2]) {
228  return 0;
229  }
230  b2 *= z__[i4] / z__[i4 - 2];
231  a2 += b2;
232  if (maxMACRO(b2,b1) * 100. < a2 || .563 < a2) {
233  goto L20;
234  }
235 /* L10: */
236  }
237 L20:
238  a2 *= 1.05;
239 
240 /* Rayleigh quotient residual bound. */
241 
242  if (a2 < .563) {
243  s = gam * (1. - template_blas_sqrt(a2)) / (a2 + 1.);
244  }
245  }
246  } else if (*dmin__ == *dn2) {
247 
248 /* Case 5. */
249 
250  *ttype = -5;
251  s = *dmin__ * .25;
252 
253 /* Compute contribution to norm squared from I > NN-2. */
254 
255  np = nn - (*pp << 1);
256  b1 = z__[np - 2];
257  b2 = z__[np - 6];
258  gam = *dn2;
259  if (z__[np - 8] > b2 || z__[np - 4] > b1) {
260  return 0;
261  }
262  a2 = z__[np - 8] / b2 * (z__[np - 4] / b1 + 1.);
263 
264 /* Approximate contribution to norm squared from I < NN-2. */
265 
266  if (*n0 - *i0 > 2) {
267  b2 = z__[nn - 13] / z__[nn - 15];
268  a2 += b2;
269  i__1 = (*i0 << 2) - 1 + *pp;
270  for (i4 = nn - 17; i4 >= i__1; i4 += -4) {
271  if (b2 == 0.) {
272  goto L40;
273  }
274  b1 = b2;
275  if (z__[i4] > z__[i4 - 2]) {
276  return 0;
277  }
278  b2 *= z__[i4] / z__[i4 - 2];
279  a2 += b2;
280  if (maxMACRO(b2,b1) * 100. < a2 || .563 < a2) {
281  goto L40;
282  }
283 /* L30: */
284  }
285 L40:
286  a2 *= 1.05;
287  }
288 
289  if (a2 < .563) {
290  s = gam * (1. - template_blas_sqrt(a2)) / (a2 + 1.);
291  }
292  } else {
293 
294 /* Case 6, no information to guide us. */
295 
296  if (*ttype == -6) {
297  *g += (1. - *g) * .333;
298  } else if (*ttype == -18) {
299  *g = .083250000000000005;
300  } else {
301  *g = .25;
302  }
303  s = *g * *dmin__;
304  *ttype = -6;
305  }
306 
307  } else if (*n0in == *n0 + 1) {
308 
309 /* One eigenvalue just deflated. Use DMIN1, DN1 for DMIN and DN. */
310 
311  if (*dmin1 == *dn1 && *dmin2 == *dn2) {
312 
313 /* Cases 7 and 8. */
314 
315  *ttype = -7;
316  s = *dmin1 * .333;
317  if (z__[nn - 5] > z__[nn - 7]) {
318  return 0;
319  }
320  b1 = z__[nn - 5] / z__[nn - 7];
321  b2 = b1;
322  if (b2 == 0.) {
323  goto L60;
324  }
325  i__1 = (*i0 << 2) - 1 + *pp;
326  for (i4 = (*n0 << 2) - 9 + *pp; i4 >= i__1; i4 += -4) {
327  a2 = b1;
328  if (z__[i4] > z__[i4 - 2]) {
329  return 0;
330  }
331  b1 *= z__[i4] / z__[i4 - 2];
332  b2 += b1;
333  if (maxMACRO(b1,a2) * 100. < b2) {
334  goto L60;
335  }
336 /* L50: */
337  }
338 L60:
339  b2 = template_blas_sqrt(b2 * 1.05);
340 /* Computing 2nd power */
341  d__1 = b2;
342  a2 = *dmin1 / (d__1 * d__1 + 1.);
343  gap2 = *dmin2 * .5 - a2;
344  if (gap2 > 0. && gap2 > b2 * a2) {
345 /* Computing MAX */
346  d__1 = s, d__2 = a2 * (1. - a2 * 1.01 * (b2 / gap2) * b2);
347  s = maxMACRO(d__1,d__2);
348  } else {
349 /* Computing MAX */
350  d__1 = s, d__2 = a2 * (1. - b2 * 1.01);
351  s = maxMACRO(d__1,d__2);
352  *ttype = -8;
353  }
354  } else {
355 
356 /* Case 9. */
357 
358  s = *dmin1 * .25;
359  if (*dmin1 == *dn1) {
360  s = *dmin1 * .5;
361  }
362  *ttype = -9;
363  }
364 
365  } else if (*n0in == *n0 + 2) {
366 
367 /* Two eigenvalues deflated. Use DMIN2, DN2 for DMIN and DN. */
368 
369 /* Cases 10 and 11. */
370 
371  if (*dmin2 == *dn2 && z__[nn - 5] * 2. < z__[nn - 7]) {
372  *ttype = -10;
373  s = *dmin2 * .333;
374  if (z__[nn - 5] > z__[nn - 7]) {
375  return 0;
376  }
377  b1 = z__[nn - 5] / z__[nn - 7];
378  b2 = b1;
379  if (b2 == 0.) {
380  goto L80;
381  }
382  i__1 = (*i0 << 2) - 1 + *pp;
383  for (i4 = (*n0 << 2) - 9 + *pp; i4 >= i__1; i4 += -4) {
384  if (z__[i4] > z__[i4 - 2]) {
385  return 0;
386  }
387  b1 *= z__[i4] / z__[i4 - 2];
388  b2 += b1;
389  if (b1 * 100. < b2) {
390  goto L80;
391  }
392 /* L70: */
393  }
394 L80:
395  b2 = template_blas_sqrt(b2 * 1.05);
396 /* Computing 2nd power */
397  d__1 = b2;
398  a2 = *dmin2 / (d__1 * d__1 + 1.);
399  gap2 = z__[nn - 7] + z__[nn - 9] - template_blas_sqrt(z__[nn - 11]) * template_blas_sqrt(z__[
400  nn - 9]) - a2;
401  if (gap2 > 0. && gap2 > b2 * a2) {
402 /* Computing MAX */
403  d__1 = s, d__2 = a2 * (1. - a2 * 1.01 * (b2 / gap2) * b2);
404  s = maxMACRO(d__1,d__2);
405  } else {
406 /* Computing MAX */
407  d__1 = s, d__2 = a2 * (1. - b2 * 1.01);
408  s = maxMACRO(d__1,d__2);
409  }
410  } else {
411  s = *dmin2 * .25;
412  *ttype = -11;
413  }
414  } else if (*n0in > *n0 + 2) {
415 
416 /* Case 12, more than two eigenvalues deflated. No information. */
417 
418  s = 0.;
419  *ttype = -12;
420  }
421 
422  *tau = s;
423  return 0;
424 
425 /* End of DLASQ4 */
426 
427 } /* dlasq4_ */
428 
429 #endif
int integer
Definition: template_blas_common.h:40
#define maxMACRO(a, b)
Definition: template_blas_common.h:45
#define minMACRO(a, b)
Definition: template_blas_common.h:46
Treal template_blas_sqrt(Treal x)
int template_lapack_lasq4(integer *i0, integer *n0, Treal *z__, integer *pp, integer *n0in, Treal *dmin__, Treal *dmin1, Treal *dmin2, Treal *dn, Treal *dn1, Treal *dn2, Treal *tau, integer *ttype, Treal *g)
Definition: template_lapack_lasq4.h:41