ergo
template_lapack_lar1v.h
Go to the documentation of this file.
1 /* Ergo, version 3.7, a program for linear scaling electronic structure
2  * calculations.
3  * Copyright (C) 2018 Elias Rudberg, Emanuel H. Rubensson, Pawel Salek,
4  * and Anastasia Kruchinina.
5  *
6  * This program is free software: you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation, either version 3 of the License, or
9  * (at your option) any later version.
10  *
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program. If not, see <http://www.gnu.org/licenses/>.
18  *
19  * Primary academic reference:
20  * Ergo: An open-source program for linear-scaling electronic structure
21  * calculations,
22  * Elias Rudberg, Emanuel H. Rubensson, Pawel Salek, and Anastasia
23  * Kruchinina,
24  * SoftwareX 7, 107 (2018),
25  * <http://dx.doi.org/10.1016/j.softx.2018.03.005>
26  *
27  * For further information about Ergo, see <http://www.ergoscf.org>.
28  */
29 
30  /* This file belongs to the template_lapack part of the Ergo source
31  * code. The source files in the template_lapack directory are modified
32  * versions of files originally distributed as CLAPACK, see the
33  * Copyright/license notice in the file template_lapack/COPYING.
34  */
35 
36 
37 #ifndef TEMPLATE_LAPACK_LAR1V_HEADER
38 #define TEMPLATE_LAPACK_LAR1V_HEADER
39 
40 template<class Treal>
41 int template_lapack_lar1v(integer *n, integer *b1, integer *bn, Treal
42  *lambda, Treal *d__, Treal *l, Treal *ld, Treal *
43  lld, Treal *pivmin, Treal *gaptol, Treal *z__, logical
44  *wantnc, integer *negcnt, Treal *ztz, Treal *mingma,
45  integer *r__, integer *isuppz, Treal *nrminv, Treal *resid,
46  Treal *rqcorr, Treal *work)
47 {
48  /* System generated locals */
49  integer i__1;
50  Treal d__1, d__2, d__3;
51 
52 
53  /* Local variables */
54  integer i__;
55  Treal s;
56  integer r1, r2;
57  Treal eps, tmp;
58  integer neg1, neg2, indp, inds;
59  Treal dplus;
60  integer indlpl, indumn;
61  Treal dminus;
62  logical sawnan1, sawnan2;
63 
64 
65 /* -- LAPACK auxiliary routine (version 3.2) -- */
66 /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
67 /* November 2006 */
68 
69 /* .. Scalar Arguments .. */
70 /* .. */
71 /* .. Array Arguments .. */
72 /* .. */
73 
74 /* Purpose */
75 /* ======= */
76 
77 /* DLAR1V computes the (scaled) r-th column of the inverse of */
78 /* the sumbmatrix in rows B1 through BN of the tridiagonal matrix */
79 /* L D L^T - sigma I. When sigma is close to an eigenvalue, the */
80 /* computed vector is an accurate eigenvector. Usually, r corresponds */
81 /* to the index where the eigenvector is largest in magnitude. */
82 /* The following steps accomplish this computation : */
83 /* (a) Stationary qd transform, L D L^T - sigma I = L(+) D(+) L(+)^T, */
84 /* (b) Progressive qd transform, L D L^T - sigma I = U(-) D(-) U(-)^T, */
85 /* (c) Computation of the diagonal elements of the inverse of */
86 /* L D L^T - sigma I by combining the above transforms, and choosing */
87 /* r as the index where the diagonal of the inverse is (one of the) */
88 /* largest in magnitude. */
89 /* (d) Computation of the (scaled) r-th column of the inverse using the */
90 /* twisted factorization obtained by combining the top part of the */
91 /* the stationary and the bottom part of the progressive transform. */
92 
93 /* Arguments */
94 /* ========= */
95 
96 /* N (input) INTEGER */
97 /* The order of the matrix L D L^T. */
98 
99 /* B1 (input) INTEGER */
100 /* First index of the submatrix of L D L^T. */
101 
102 /* BN (input) INTEGER */
103 /* Last index of the submatrix of L D L^T. */
104 
105 /* LAMBDA (input) DOUBLE PRECISION */
106 /* The shift. In order to compute an accurate eigenvector, */
107 /* LAMBDA should be a good approximation to an eigenvalue */
108 /* of L D L^T. */
109 
110 /* L (input) DOUBLE PRECISION array, dimension (N-1) */
111 /* The (n-1) subdiagonal elements of the unit bidiagonal matrix */
112 /* L, in elements 1 to N-1. */
113 
114 /* D (input) DOUBLE PRECISION array, dimension (N) */
115 /* The n diagonal elements of the diagonal matrix D. */
116 
117 /* LD (input) DOUBLE PRECISION array, dimension (N-1) */
118 /* The n-1 elements L(i)*D(i). */
119 
120 /* LLD (input) DOUBLE PRECISION array, dimension (N-1) */
121 /* The n-1 elements L(i)*L(i)*D(i). */
122 
123 /* PIVMIN (input) DOUBLE PRECISION */
124 /* The minimum pivot in the Sturm sequence. */
125 
126 /* GAPTOL (input) DOUBLE PRECISION */
127 /* Tolerance that indicates when eigenvector entries are negligible */
128 /* w.r.t. their contribution to the residual. */
129 
130 /* Z (input/output) DOUBLE PRECISION array, dimension (N) */
131 /* On input, all entries of Z must be set to 0. */
132 /* On output, Z contains the (scaled) r-th column of the */
133 /* inverse. The scaling is such that Z(R) equals 1. */
134 
135 /* WANTNC (input) LOGICAL */
136 /* Specifies whether NEGCNT has to be computed. */
137 
138 /* NEGCNT (output) INTEGER */
139 /* If WANTNC is .TRUE. then NEGCNT = the number of pivots < pivmin */
140 /* in the matrix factorization L D L^T, and NEGCNT = -1 otherwise. */
141 
142 /* ZTZ (output) DOUBLE PRECISION */
143 /* The square of the 2-norm of Z. */
144 
145 /* MINGMA (output) DOUBLE PRECISION */
146 /* The reciprocal of the largest (in magnitude) diagonal */
147 /* element of the inverse of L D L^T - sigma I. */
148 
149 /* R (input/output) INTEGER */
150 /* The twist index for the twisted factorization used to */
151 /* compute Z. */
152 /* On input, 0 <= R <= N. If R is input as 0, R is set to */
153 /* the index where (L D L^T - sigma I)^{-1} is largest */
154 /* in magnitude. If 1 <= R <= N, R is unchanged. */
155 /* On output, R contains the twist index used to compute Z. */
156 /* Ideally, R designates the position of the maximum entry in the */
157 /* eigenvector. */
158 
159 /* ISUPPZ (output) INTEGER array, dimension (2) */
160 /* The support of the vector in Z, i.e., the vector Z is */
161 /* nonzero only in elements ISUPPZ(1) through ISUPPZ( 2 ). */
162 
163 /* NRMINV (output) DOUBLE PRECISION */
164 /* NRMINV = 1/SQRT( ZTZ ) */
165 
166 /* RESID (output) DOUBLE PRECISION */
167 /* The residual of the FP vector. */
168 /* RESID = ABS( MINGMA )/SQRT( ZTZ ) */
169 
170 /* RQCORR (output) DOUBLE PRECISION */
171 /* The Rayleigh Quotient correction to LAMBDA. */
172 /* RQCORR = MINGMA*TMP */
173 
174 /* WORK (workspace) DOUBLE PRECISION array, dimension (4*N) */
175 
176 /* Further Details */
177 /* =============== */
178 
179 /* Based on contributions by */
180 /* Beresford Parlett, University of California, Berkeley, USA */
181 /* Jim Demmel, University of California, Berkeley, USA */
182 /* Inderjit Dhillon, University of Texas, Austin, USA */
183 /* Osni Marques, LBNL/NERSC, USA */
184 /* Christof Voemel, University of California, Berkeley, USA */
185 
186 /* ===================================================================== */
187 
188 /* .. Parameters .. */
189 /* .. */
190 /* .. Local Scalars .. */
191 /* .. */
192 /* .. External Functions .. */
193 /* .. */
194 /* .. Intrinsic Functions .. */
195 /* .. */
196 /* .. Executable Statements .. */
197 
198  /* Parameter adjustments */
199  --work;
200  --isuppz;
201  --z__;
202  --lld;
203  --ld;
204  --l;
205  --d__;
206 
207  /* Function Body */
208  eps = template_lapack_lamch("Precision", (Treal)0);
209  if (*r__ == 0) {
210  r1 = *b1;
211  r2 = *bn;
212  } else {
213  r1 = *r__;
214  r2 = *r__;
215  }
216 /* Storage for LPLUS */
217  indlpl = 0;
218 /* Storage for UMINUS */
219  indumn = *n;
220  inds = (*n << 1) + 1;
221  indp = *n * 3 + 1;
222  if (*b1 == 1) {
223  work[inds] = 0.;
224  } else {
225  work[inds + *b1 - 1] = lld[*b1 - 1];
226  }
227 
228 /* Compute the stationary transform (using the differential form) */
229 /* until the index R2. */
230 
231  sawnan1 = FALSE_;
232  neg1 = 0;
233  s = work[inds + *b1 - 1] - *lambda;
234  i__1 = r1 - 1;
235  for (i__ = *b1; i__ <= i__1; ++i__) {
236  dplus = d__[i__] + s;
237  work[indlpl + i__] = ld[i__] / dplus;
238  if (dplus < 0.) {
239  ++neg1;
240  }
241  work[inds + i__] = s * work[indlpl + i__] * l[i__];
242  s = work[inds + i__] - *lambda;
243 /* L50: */
244  }
245  sawnan1 = template_lapack_isnan(&s);
246  if (sawnan1) {
247  goto L60;
248  }
249  i__1 = r2 - 1;
250  for (i__ = r1; i__ <= i__1; ++i__) {
251  dplus = d__[i__] + s;
252  work[indlpl + i__] = ld[i__] / dplus;
253  work[inds + i__] = s * work[indlpl + i__] * l[i__];
254  s = work[inds + i__] - *lambda;
255 /* L51: */
256  }
257  sawnan1 = template_lapack_isnan(&s);
258 
259 L60:
260  if (sawnan1) {
261 /* Runs a slower version of the above loop if a NaN is detected */
262  neg1 = 0;
263  s = work[inds + *b1 - 1] - *lambda;
264  i__1 = r1 - 1;
265  for (i__ = *b1; i__ <= i__1; ++i__) {
266  dplus = d__[i__] + s;
267  if (absMACRO(dplus) < *pivmin) {
268  dplus = -(*pivmin);
269  }
270  work[indlpl + i__] = ld[i__] / dplus;
271  if (dplus < 0.) {
272  ++neg1;
273  }
274  work[inds + i__] = s * work[indlpl + i__] * l[i__];
275  if (work[indlpl + i__] == 0.) {
276  work[inds + i__] = lld[i__];
277  }
278  s = work[inds + i__] - *lambda;
279 /* L70: */
280  }
281  i__1 = r2 - 1;
282  for (i__ = r1; i__ <= i__1; ++i__) {
283  dplus = d__[i__] + s;
284  if (absMACRO(dplus) < *pivmin) {
285  dplus = -(*pivmin);
286  }
287  work[indlpl + i__] = ld[i__] / dplus;
288  work[inds + i__] = s * work[indlpl + i__] * l[i__];
289  if (work[indlpl + i__] == 0.) {
290  work[inds + i__] = lld[i__];
291  }
292  s = work[inds + i__] - *lambda;
293 /* L71: */
294  }
295  }
296 
297 /* Compute the progressive transform (using the differential form) */
298 /* until the index R1 */
299 
300  sawnan2 = FALSE_;
301  neg2 = 0;
302  work[indp + *bn - 1] = d__[*bn] - *lambda;
303  i__1 = r1;
304  for (i__ = *bn - 1; i__ >= i__1; --i__) {
305  dminus = lld[i__] + work[indp + i__];
306  tmp = d__[i__] / dminus;
307  if (dminus < 0.) {
308  ++neg2;
309  }
310  work[indumn + i__] = l[i__] * tmp;
311  work[indp + i__ - 1] = work[indp + i__] * tmp - *lambda;
312 /* L80: */
313  }
314  tmp = work[indp + r1 - 1];
315  sawnan2 = template_lapack_isnan(&tmp);
316  if (sawnan2) {
317 /* Runs a slower version of the above loop if a NaN is detected */
318  neg2 = 0;
319  i__1 = r1;
320  for (i__ = *bn - 1; i__ >= i__1; --i__) {
321  dminus = lld[i__] + work[indp + i__];
322  if (absMACRO(dminus) < *pivmin) {
323  dminus = -(*pivmin);
324  }
325  tmp = d__[i__] / dminus;
326  if (dminus < 0.) {
327  ++neg2;
328  }
329  work[indumn + i__] = l[i__] * tmp;
330  work[indp + i__ - 1] = work[indp + i__] * tmp - *lambda;
331  if (tmp == 0.) {
332  work[indp + i__ - 1] = d__[i__] - *lambda;
333  }
334 /* L100: */
335  }
336  }
337 
338 /* Find the index (from R1 to R2) of the largest (in magnitude) */
339 /* diagonal element of the inverse */
340 
341  *mingma = work[inds + r1 - 1] + work[indp + r1 - 1];
342  if (*mingma < 0.) {
343  ++neg1;
344  }
345  if (*wantnc) {
346  *negcnt = neg1 + neg2;
347  } else {
348  *negcnt = -1;
349  }
350  if (absMACRO(*mingma) == 0.) {
351  *mingma = eps * work[inds + r1 - 1];
352  }
353  *r__ = r1;
354  i__1 = r2 - 1;
355  for (i__ = r1; i__ <= i__1; ++i__) {
356  tmp = work[inds + i__] + work[indp + i__];
357  if (tmp == 0.) {
358  tmp = eps * work[inds + i__];
359  }
360  if (absMACRO(tmp) <= absMACRO(*mingma)) {
361  *mingma = tmp;
362  *r__ = i__ + 1;
363  }
364 /* L110: */
365  }
366 
367 /* Compute the FP vector: solve N^T v = e_r */
368 
369  isuppz[1] = *b1;
370  isuppz[2] = *bn;
371  z__[*r__] = 1.;
372  *ztz = 1.;
373 
374 /* Compute the FP vector upwards from R */
375 
376  if (! sawnan1 && ! sawnan2) {
377  i__1 = *b1;
378  for (i__ = *r__ - 1; i__ >= i__1; --i__) {
379  z__[i__] = -(work[indlpl + i__] * z__[i__ + 1]);
380  if (((d__1 = z__[i__], absMACRO(d__1)) + (d__2 = z__[i__ + 1], absMACRO(
381  d__2))) * (d__3 = ld[i__], absMACRO(d__3)) < *gaptol) {
382  z__[i__] = 0.;
383  isuppz[1] = i__ + 1;
384  goto L220;
385  }
386  *ztz += z__[i__] * z__[i__];
387 /* L210: */
388  }
389 L220:
390  ;
391  } else {
392 /* Run slower loop if NaN occurred. */
393  i__1 = *b1;
394  for (i__ = *r__ - 1; i__ >= i__1; --i__) {
395  if (z__[i__ + 1] == 0.) {
396  z__[i__] = -(ld[i__ + 1] / ld[i__]) * z__[i__ + 2];
397  } else {
398  z__[i__] = -(work[indlpl + i__] * z__[i__ + 1]);
399  }
400  if (((d__1 = z__[i__], absMACRO(d__1)) + (d__2 = z__[i__ + 1], absMACRO(
401  d__2))) * (d__3 = ld[i__], absMACRO(d__3)) < *gaptol) {
402  z__[i__] = 0.;
403  isuppz[1] = i__ + 1;
404  goto L240;
405  }
406  *ztz += z__[i__] * z__[i__];
407 /* L230: */
408  }
409 L240:
410  ;
411  }
412 /* Compute the FP vector downwards from R in blocks of size BLKSIZ */
413  if (! sawnan1 && ! sawnan2) {
414  i__1 = *bn - 1;
415  for (i__ = *r__; i__ <= i__1; ++i__) {
416  z__[i__ + 1] = -(work[indumn + i__] * z__[i__]);
417  if (((d__1 = z__[i__], absMACRO(d__1)) + (d__2 = z__[i__ + 1], absMACRO(
418  d__2))) * (d__3 = ld[i__], absMACRO(d__3)) < *gaptol) {
419  z__[i__ + 1] = 0.;
420  isuppz[2] = i__;
421  goto L260;
422  }
423  *ztz += z__[i__ + 1] * z__[i__ + 1];
424 /* L250: */
425  }
426 L260:
427  ;
428  } else {
429 /* Run slower loop if NaN occurred. */
430  i__1 = *bn - 1;
431  for (i__ = *r__; i__ <= i__1; ++i__) {
432  if (z__[i__] == 0.) {
433  z__[i__ + 1] = -(ld[i__ - 1] / ld[i__]) * z__[i__ - 1];
434  } else {
435  z__[i__ + 1] = -(work[indumn + i__] * z__[i__]);
436  }
437  if (((d__1 = z__[i__], absMACRO(d__1)) + (d__2 = z__[i__ + 1], absMACRO(
438  d__2))) * (d__3 = ld[i__], absMACRO(d__3)) < *gaptol) {
439  z__[i__ + 1] = 0.;
440  isuppz[2] = i__;
441  goto L280;
442  }
443  *ztz += z__[i__ + 1] * z__[i__ + 1];
444 /* L270: */
445  }
446 L280:
447  ;
448  }
449 
450 /* Compute quantities for convergence test */
451 
452  tmp = 1. / *ztz;
453  *nrminv = template_blas_sqrt(tmp);
454  *resid = absMACRO(*mingma) * *nrminv;
455  *rqcorr = *mingma * tmp;
456 
457 
458  return 0;
459 
460 /* End of DLAR1V */
461 
462 } /* dlar1v_ */
463 
464 #endif
#define absMACRO(x)
Definition: template_blas_common.h:47
logical template_lapack_isnan(Treal *din)
Definition: template_lapack_isnan.h:45
int integer
Definition: template_blas_common.h:40
int template_lapack_lar1v(integer *n, integer *b1, integer *bn, Treal *lambda, Treal *d__, Treal *l, Treal *ld, Treal *lld, Treal *pivmin, Treal *gaptol, Treal *z__, logical *wantnc, integer *negcnt, Treal *ztz, Treal *mingma, integer *r__, integer *isuppz, Treal *nrminv, Treal *resid, Treal *rqcorr, Treal *work)
Definition: template_lapack_lar1v.h:41
Treal template_lapack_lamch(const char *cmach, Treal dummyReal)
Definition: template_lapack_lamch.h:202
bool logical
Definition: template_blas_common.h:41
#define FALSE_
Definition: template_lapack_common.h:43
Treal template_blas_sqrt(Treal x)