ergo
template_lapack_lag2.h
Go to the documentation of this file.
1 /* Ergo, version 3.7, a program for linear scaling electronic structure
2  * calculations.
3  * Copyright (C) 2018 Elias Rudberg, Emanuel H. Rubensson, Pawel Salek,
4  * and Anastasia Kruchinina.
5  *
6  * This program is free software: you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation, either version 3 of the License, or
9  * (at your option) any later version.
10  *
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program. If not, see <http://www.gnu.org/licenses/>.
18  *
19  * Primary academic reference:
20  * Ergo: An open-source program for linear-scaling electronic structure
21  * calculations,
22  * Elias Rudberg, Emanuel H. Rubensson, Pawel Salek, and Anastasia
23  * Kruchinina,
24  * SoftwareX 7, 107 (2018),
25  * <http://dx.doi.org/10.1016/j.softx.2018.03.005>
26  *
27  * For further information about Ergo, see <http://www.ergoscf.org>.
28  */
29 
30  /* This file belongs to the template_lapack part of the Ergo source
31  * code. The source files in the template_lapack directory are modified
32  * versions of files originally distributed as CLAPACK, see the
33  * Copyright/license notice in the file template_lapack/COPYING.
34  */
35 
36 
37 #ifndef TEMPLATE_LAPACK_LAG2_HEADER
38 #define TEMPLATE_LAPACK_LAG2_HEADER
39 
40 
41 template<class Treal>
42 int template_lapack_lag2(const Treal *a, const integer *lda, const Treal *b,
43  const integer *ldb, const Treal *safmin, Treal *scale1, Treal *
44  scale2, Treal *wr1, Treal *wr2, Treal *wi)
45 {
46 /* -- LAPACK auxiliary routine (version 3.0) --
47  Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
48  Courant Institute, Argonne National Lab, and Rice University
49  March 31, 1993
50 
51 
52  Purpose
53  =======
54 
55  DLAG2 computes the eigenvalues of a 2 x 2 generalized eigenvalue
56  problem A - w B, with scaling as necessary to avoid over-/underflow.
57 
58  The scaling factor "s" results in a modified eigenvalue equation
59 
60  s A - w B
61 
62  where s is a non-negative scaling factor chosen so that w, w B,
63  and s A do not overflow and, if possible, do not underflow, either.
64 
65  Arguments
66  =========
67 
68  A (input) DOUBLE PRECISION array, dimension (LDA, 2)
69  On entry, the 2 x 2 matrix A. It is assumed that its 1-norm
70  is less than 1/SAFMIN. Entries less than
71  sqrt(SAFMIN)*norm(A) are subject to being treated as zero.
72 
73  LDA (input) INTEGER
74  The leading dimension of the array A. LDA >= 2.
75 
76  B (input) DOUBLE PRECISION array, dimension (LDB, 2)
77  On entry, the 2 x 2 upper triangular matrix B. It is
78  assumed that the one-norm of B is less than 1/SAFMIN. The
79  diagonals should be at least sqrt(SAFMIN) times the largest
80  element of B (in absolute value); if a diagonal is smaller
81  than that, then +/- sqrt(SAFMIN) will be used instead of
82  that diagonal.
83 
84  LDB (input) INTEGER
85  The leading dimension of the array B. LDB >= 2.
86 
87  SAFMIN (input) DOUBLE PRECISION
88  The smallest positive number s.t. 1/SAFMIN does not
89  overflow. (This should always be DLAMCH('S') -- it is an
90  argument in order to avoid having to call DLAMCH frequently.)
91 
92  SCALE1 (output) DOUBLE PRECISION
93  A scaling factor used to avoid over-/underflow in the
94  eigenvalue equation which defines the first eigenvalue. If
95  the eigenvalues are complex, then the eigenvalues are
96  ( WR1 +/- WI i ) / SCALE1 (which may lie outside the
97  exponent range of the machine), SCALE1=SCALE2, and SCALE1
98  will always be positive. If the eigenvalues are real, then
99  the first (real) eigenvalue is WR1 / SCALE1 , but this may
100  overflow or underflow, and in fact, SCALE1 may be zero or
101  less than the underflow threshhold if the exact eigenvalue
102  is sufficiently large.
103 
104  SCALE2 (output) DOUBLE PRECISION
105  A scaling factor used to avoid over-/underflow in the
106  eigenvalue equation which defines the second eigenvalue. If
107  the eigenvalues are complex, then SCALE2=SCALE1. If the
108  eigenvalues are real, then the second (real) eigenvalue is
109  WR2 / SCALE2 , but this may overflow or underflow, and in
110  fact, SCALE2 may be zero or less than the underflow
111  threshhold if the exact eigenvalue is sufficiently large.
112 
113  WR1 (output) DOUBLE PRECISION
114  If the eigenvalue is real, then WR1 is SCALE1 times the
115  eigenvalue closest to the (2,2) element of A B**(-1). If the
116  eigenvalue is complex, then WR1=WR2 is SCALE1 times the real
117  part of the eigenvalues.
118 
119  WR2 (output) DOUBLE PRECISION
120  If the eigenvalue is real, then WR2 is SCALE2 times the
121  other eigenvalue. If the eigenvalue is complex, then
122  WR1=WR2 is SCALE1 times the real part of the eigenvalues.
123 
124  WI (output) DOUBLE PRECISION
125  If the eigenvalue is real, then WI is zero. If the
126  eigenvalue is complex, then WI is SCALE1 times the imaginary
127  part of the eigenvalues. WI will always be non-negative.
128 
129  =====================================================================
130 
131 
132  Parameter adjustments */
133  /* System generated locals */
134  integer a_dim1, a_offset, b_dim1, b_offset;
135  Treal d__1, d__2, d__3, d__4, d__5, d__6;
136  /* Local variables */
137  Treal diff, bmin, wbig, wabs, wdet, r__, binv11, binv22,
138  discr, anorm, bnorm, bsize, shift, c1, c2, c3, c4, c5, rtmin,
139  rtmax, wsize, s1, s2, a11, a12, a21, a22, b11, b12, b22, ascale,
140  bscale, pp, qq, ss, wscale, safmax, wsmall, as11, as12, as22, sum,
141  abi22;
142 #define a_ref(a_1,a_2) a[(a_2)*a_dim1 + a_1]
143 #define b_ref(a_1,a_2) b[(a_2)*b_dim1 + a_1]
144 
145  a_dim1 = *lda;
146  a_offset = 1 + a_dim1 * 1;
147  a -= a_offset;
148  b_dim1 = *ldb;
149  b_offset = 1 + b_dim1 * 1;
150  b -= b_offset;
151 
152  /* Function Body */
153  rtmin = template_blas_sqrt(*safmin);
154  rtmax = 1. / rtmin;
155  safmax = 1. / *safmin;
156 
157 /* Scale A
158 
159  Computing MAX */
160  d__5 = (d__1 = a_ref(1, 1), absMACRO(d__1)) + (d__2 = a_ref(2, 1), absMACRO(d__2)),
161  d__6 = (d__3 = a_ref(1, 2), absMACRO(d__3)) + (d__4 = a_ref(2, 2), absMACRO(
162  d__4)), d__5 = maxMACRO(d__5,d__6);
163  anorm = maxMACRO(d__5,*safmin);
164  ascale = 1. / anorm;
165  a11 = ascale * a_ref(1, 1);
166  a21 = ascale * a_ref(2, 1);
167  a12 = ascale * a_ref(1, 2);
168  a22 = ascale * a_ref(2, 2);
169 
170 /* Perturb B if necessary to insure non-singularity */
171 
172  b11 = b_ref(1, 1);
173  b12 = b_ref(1, 2);
174  b22 = b_ref(2, 2);
175 /* Computing MAX */
176  d__1 = absMACRO(b11), d__2 = absMACRO(b12), d__1 = maxMACRO(d__1,d__2), d__2 = absMACRO(b22),
177  d__1 = maxMACRO(d__1,d__2);
178  bmin = rtmin * maxMACRO(d__1,rtmin);
179  if (absMACRO(b11) < bmin) {
180  b11 = template_lapack_d_sign(&bmin, &b11);
181  }
182  if (absMACRO(b22) < bmin) {
183  b22 = template_lapack_d_sign(&bmin, &b22);
184  }
185 
186 /* Scale B
187 
188  Computing MAX */
189  d__1 = absMACRO(b11), d__2 = absMACRO(b12) + absMACRO(b22), d__1 = maxMACRO(d__1,d__2);
190  bnorm = maxMACRO(d__1,*safmin);
191 /* Computing MAX */
192  d__1 = absMACRO(b11), d__2 = absMACRO(b22);
193  bsize = maxMACRO(d__1,d__2);
194  bscale = 1. / bsize;
195  b11 *= bscale;
196  b12 *= bscale;
197  b22 *= bscale;
198 
199 /* Compute larger eigenvalue by method described by C. van Loan
200 
201  ( AS is A shifted by -SHIFT*B ) */
202 
203  binv11 = 1. / b11;
204  binv22 = 1. / b22;
205  s1 = a11 * binv11;
206  s2 = a22 * binv22;
207  if (absMACRO(s1) <= absMACRO(s2)) {
208  as12 = a12 - s1 * b12;
209  as22 = a22 - s1 * b22;
210  ss = a21 * (binv11 * binv22);
211  abi22 = as22 * binv22 - ss * b12;
212  pp = abi22 * .5;
213  shift = s1;
214  } else {
215  as12 = a12 - s2 * b12;
216  as11 = a11 - s2 * b11;
217  ss = a21 * (binv11 * binv22);
218  abi22 = -ss * b12;
219  pp = (as11 * binv11 + abi22) * .5;
220  shift = s2;
221  }
222  qq = ss * as12;
223  if ((d__1 = pp * rtmin, absMACRO(d__1)) >= 1.) {
224 /* Computing 2nd power */
225  d__1 = rtmin * pp;
226  discr = d__1 * d__1 + qq * *safmin;
227  r__ = template_blas_sqrt((absMACRO(discr))) * rtmax;
228  } else {
229 /* Computing 2nd power */
230  d__1 = pp;
231  if (d__1 * d__1 + absMACRO(qq) <= *safmin) {
232 /* Computing 2nd power */
233  d__1 = rtmax * pp;
234  discr = d__1 * d__1 + qq * safmax;
235  r__ = template_blas_sqrt((absMACRO(discr))) * rtmin;
236  } else {
237 /* Computing 2nd power */
238  d__1 = pp;
239  discr = d__1 * d__1 + qq;
240  r__ = template_blas_sqrt((absMACRO(discr)));
241  }
242  }
243 
244 /* Note: the test of R in the following IF is to cover the case when
245  DISCR is small and negative and is flushed to zero during
246  the calculation of R. On machines which have a consistent
247  flush-to-zero threshhold and handle numbers above that
248  threshhold correctly, it would not be necessary. */
249 
250  if (discr >= 0. || r__ == 0.) {
251  sum = pp + template_lapack_d_sign(&r__, &pp);
252  diff = pp - template_lapack_d_sign(&r__, &pp);
253  wbig = shift + sum;
254 
255 /* Compute smaller eigenvalue */
256 
257  wsmall = shift + diff;
258 /* Computing MAX */
259  d__1 = absMACRO(wsmall);
260  if (absMACRO(wbig) * .5 > maxMACRO(d__1,*safmin)) {
261  wdet = (a11 * a22 - a12 * a21) * (binv11 * binv22);
262  wsmall = wdet / wbig;
263  }
264 
265 /* Choose (real) eigenvalue closest to 2,2 element of A*B**(-1)
266  for WR1. */
267 
268  if (pp > abi22) {
269  *wr1 = minMACRO(wbig,wsmall);
270  *wr2 = maxMACRO(wbig,wsmall);
271  } else {
272  *wr1 = maxMACRO(wbig,wsmall);
273  *wr2 = minMACRO(wbig,wsmall);
274  }
275  *wi = 0.;
276  } else {
277 
278 /* Complex eigenvalues */
279 
280  *wr1 = shift + pp;
281  *wr2 = *wr1;
282  *wi = r__;
283  }
284 
285 /* Further scaling to avoid underflow and overflow in computing
286  SCALE1 and overflow in computing w*B.
287 
288  This scale factor (WSCALE) is bounded from above using C1 and C2,
289  and from below using C3 and C4.
290  C1 implements the condition s A must never overflow.
291  C2 implements the condition w B must never overflow.
292  C3, with C2,
293  implement the condition that s A - w B must never overflow.
294  C4 implements the condition s should not underflow.
295  C5 implements the condition max(s,|w|) should be at least 2. */
296 
297  c1 = bsize * (*safmin * maxMACRO(1.,ascale));
298  c2 = *safmin * maxMACRO(1.,bnorm);
299  c3 = bsize * *safmin;
300  if (ascale <= 1. && bsize <= 1.) {
301 /* Computing MIN */
302  d__1 = 1., d__2 = ascale / *safmin * bsize;
303  c4 = minMACRO(d__1,d__2);
304  } else {
305  c4 = 1.;
306  }
307  if (ascale <= 1. || bsize <= 1.) {
308 /* Computing MIN */
309  d__1 = 1., d__2 = ascale * bsize;
310  c5 = minMACRO(d__1,d__2);
311  } else {
312  c5 = 1.;
313  }
314 
315 /* Scale first eigenvalue */
316 
317  wabs = absMACRO(*wr1) + absMACRO(*wi);
318 /* Computing MAX
319  Computing MIN */
320  d__3 = c4, d__4 = maxMACRO(wabs,c5) * .5;
321  d__1 = maxMACRO(*safmin,c1), d__2 = (wabs * c2 + c3) * 1.0000100000000001,
322  d__1 = maxMACRO(d__1,d__2), d__2 = minMACRO(d__3,d__4);
323  wsize = maxMACRO(d__1,d__2);
324  if (wsize != 1.) {
325  wscale = 1. / wsize;
326  if (wsize > 1.) {
327  *scale1 = maxMACRO(ascale,bsize) * wscale * minMACRO(ascale,bsize);
328  } else {
329  *scale1 = minMACRO(ascale,bsize) * wscale * maxMACRO(ascale,bsize);
330  }
331  *wr1 *= wscale;
332  if (*wi != 0.) {
333  *wi *= wscale;
334  *wr2 = *wr1;
335  *scale2 = *scale1;
336  }
337  } else {
338  *scale1 = ascale * bsize;
339  *scale2 = *scale1;
340  }
341 
342 /* Scale second eigenvalue (if real) */
343 
344  if (*wi == 0.) {
345 /* Computing MAX
346  Computing MIN
347  Computing MAX */
348  d__5 = absMACRO(*wr2);
349  d__3 = c4, d__4 = maxMACRO(d__5,c5) * .5;
350  d__1 = maxMACRO(*safmin,c1), d__2 = (absMACRO(*wr2) * c2 + c3) *
351  1.0000100000000001, d__1 = maxMACRO(d__1,d__2), d__2 = minMACRO(d__3,
352  d__4);
353  wsize = maxMACRO(d__1,d__2);
354  if (wsize != 1.) {
355  wscale = 1. / wsize;
356  if (wsize > 1.) {
357  *scale2 = maxMACRO(ascale,bsize) * wscale * minMACRO(ascale,bsize);
358  } else {
359  *scale2 = minMACRO(ascale,bsize) * wscale * maxMACRO(ascale,bsize);
360  }
361  *wr2 *= wscale;
362  } else {
363  *scale2 = ascale * bsize;
364  }
365  }
366 
367 /* End of DLAG2 */
368 
369  return 0;
370 } /* dlag2_ */
371 
372 #undef b_ref
373 #undef a_ref
374 
375 
376 #endif
#define absMACRO(x)
Definition: template_blas_common.h:47
int integer
Definition: template_blas_common.h:40
#define b_ref(a_1, a_2)
#define maxMACRO(a, b)
Definition: template_blas_common.h:45
#define minMACRO(a, b)
Definition: template_blas_common.h:46
int template_lapack_lag2(const Treal *a, const integer *lda, const Treal *b, const integer *ldb, const Treal *safmin, Treal *scale1, Treal *scale2, Treal *wr1, Treal *wr2, Treal *wi)
Definition: template_lapack_lag2.h:42
Treal template_lapack_d_sign(const Treal *a, const Treal *b)
Definition: template_lapack_lamch.h:48
#define a_ref(a_1, a_2)
Treal template_blas_sqrt(Treal x)