ergo
template_lapack_laln2.h
Go to the documentation of this file.
1 /* Ergo, version 3.7, a program for linear scaling electronic structure
2  * calculations.
3  * Copyright (C) 2018 Elias Rudberg, Emanuel H. Rubensson, Pawel Salek,
4  * and Anastasia Kruchinina.
5  *
6  * This program is free software: you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation, either version 3 of the License, or
9  * (at your option) any later version.
10  *
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program. If not, see <http://www.gnu.org/licenses/>.
18  *
19  * Primary academic reference:
20  * Ergo: An open-source program for linear-scaling electronic structure
21  * calculations,
22  * Elias Rudberg, Emanuel H. Rubensson, Pawel Salek, and Anastasia
23  * Kruchinina,
24  * SoftwareX 7, 107 (2018),
25  * <http://dx.doi.org/10.1016/j.softx.2018.03.005>
26  *
27  * For further information about Ergo, see <http://www.ergoscf.org>.
28  */
29 
30  /* This file belongs to the template_lapack part of the Ergo source
31  * code. The source files in the template_lapack directory are modified
32  * versions of files originally distributed as CLAPACK, see the
33  * Copyright/license notice in the file template_lapack/COPYING.
34  */
35 
36 
37 #ifndef TEMPLATE_LAPACK_LALN2_HEADER
38 #define TEMPLATE_LAPACK_LALN2_HEADER
39 
40 
41 template<class Treal>
42 int template_lapack_laln2(const logical *ltrans, const integer *na, const integer *nw,
43  const Treal *smin, const Treal *ca, const Treal *a, const integer *lda,
44  const Treal *d1, const Treal *d2, const Treal *b, const integer *ldb,
45  const Treal *wr, const Treal *wi, Treal *x, const integer *ldx,
46  Treal *scale, Treal *xnorm, integer *info)
47 {
48 /* -- LAPACK auxiliary routine (version 3.0) --
49  Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
50  Courant Institute, Argonne National Lab, and Rice University
51  October 31, 1992
52 
53 
54  Purpose
55  =======
56 
57  DLALN2 solves a system of the form (ca A - w D ) X = s B
58  or (ca A' - w D) X = s B with possible scaling ("s") and
59  perturbation of A. (A' means A-transpose.)
60 
61  A is an NA x NA real matrix, ca is a real scalar, D is an NA x NA
62  real diagonal matrix, w is a real or complex value, and X and B are
63  NA x 1 matrices -- real if w is real, complex if w is complex. NA
64  may be 1 or 2.
65 
66  If w is complex, X and B are represented as NA x 2 matrices,
67  the first column of each being the real part and the second
68  being the imaginary part.
69 
70  "s" is a scaling factor (.LE. 1), computed by DLALN2, which is
71  so chosen that X can be computed without overflow. X is further
72  scaled if necessary to assure that norm(ca A - w D)*norm(X) is less
73  than overflow.
74 
75  If both singular values of (ca A - w D) are less than SMIN,
76  SMIN*identity will be used instead of (ca A - w D). If only one
77  singular value is less than SMIN, one element of (ca A - w D) will be
78  perturbed enough to make the smallest singular value roughly SMIN.
79  If both singular values are at least SMIN, (ca A - w D) will not be
80  perturbed. In any case, the perturbation will be at most some small
81  multiple of max( SMIN, ulp*norm(ca A - w D) ). The singular values
82  are computed by infinity-norm approximations, and thus will only be
83  correct to a factor of 2 or so.
84 
85  Note: all input quantities are assumed to be smaller than overflow
86  by a reasonable factor. (See BIGNUM.)
87 
88  Arguments
89  ==========
90 
91  LTRANS (input) LOGICAL
92  =.TRUE.: A-transpose will be used.
93  =.FALSE.: A will be used (not transposed.)
94 
95  NA (input) INTEGER
96  The size of the matrix A. It may (only) be 1 or 2.
97 
98  NW (input) INTEGER
99  1 if "w" is real, 2 if "w" is complex. It may only be 1
100  or 2.
101 
102  SMIN (input) DOUBLE PRECISION
103  The desired lower bound on the singular values of A. This
104  should be a safe distance away from underflow or overflow,
105  say, between (underflow/machine precision) and (machine
106  precision * overflow ). (See BIGNUM and ULP.)
107 
108  CA (input) DOUBLE PRECISION
109  The coefficient c, which A is multiplied by.
110 
111  A (input) DOUBLE PRECISION array, dimension (LDA,NA)
112  The NA x NA matrix A.
113 
114  LDA (input) INTEGER
115  The leading dimension of A. It must be at least NA.
116 
117  D1 (input) DOUBLE PRECISION
118  The 1,1 element in the diagonal matrix D.
119 
120  D2 (input) DOUBLE PRECISION
121  The 2,2 element in the diagonal matrix D. Not used if NW=1.
122 
123  B (input) DOUBLE PRECISION array, dimension (LDB,NW)
124  The NA x NW matrix B (right-hand side). If NW=2 ("w" is
125  complex), column 1 contains the real part of B and column 2
126  contains the imaginary part.
127 
128  LDB (input) INTEGER
129  The leading dimension of B. It must be at least NA.
130 
131  WR (input) DOUBLE PRECISION
132  The real part of the scalar "w".
133 
134  WI (input) DOUBLE PRECISION
135  The imaginary part of the scalar "w". Not used if NW=1.
136 
137  X (output) DOUBLE PRECISION array, dimension (LDX,NW)
138  The NA x NW matrix X (unknowns), as computed by DLALN2.
139  If NW=2 ("w" is complex), on exit, column 1 will contain
140  the real part of X and column 2 will contain the imaginary
141  part.
142 
143  LDX (input) INTEGER
144  The leading dimension of X. It must be at least NA.
145 
146  SCALE (output) DOUBLE PRECISION
147  The scale factor that B must be multiplied by to insure
148  that overflow does not occur when computing X. Thus,
149  (ca A - w D) X will be SCALE*B, not B (ignoring
150  perturbations of A.) It will be at most 1.
151 
152  XNORM (output) DOUBLE PRECISION
153  The infinity-norm of X, when X is regarded as an NA x NW
154  real matrix.
155 
156  INFO (output) INTEGER
157  An error flag. It will be set to zero if no error occurs,
158  a negative number if an argument is in error, or a positive
159  number if ca A - w D had to be perturbed.
160  The possible values are:
161  = 0: No error occurred, and (ca A - w D) did not have to be
162  perturbed.
163  = 1: (ca A - w D) had to be perturbed to make its smallest
164  (or only) singular value greater than SMIN.
165  NOTE: In the interests of speed, this routine does not
166  check the inputs for errors.
167 
168  =====================================================================
169 
170  Parameter adjustments */
171  /* Initialized data */
172  logical zswap[4] = { FALSE_,FALSE_,TRUE_,TRUE_ };
173  logical rswap[4] = { FALSE_,TRUE_,FALSE_,TRUE_ };
174  integer ipivot[16] /* was [4][4] */ = { 1,2,3,4,2,1,4,3,3,4,1,2,
175  4,3,2,1 };
176  /* System generated locals */
177  integer a_dim1, a_offset, b_dim1, b_offset, x_dim1, x_offset;
178  Treal d__1, d__2, d__3, d__4, d__5, d__6;
179  Treal equiv_0[4], equiv_1[4];
180  /* Local variables */
181  Treal bbnd, cmax, ui11r, ui12s, temp, ur11r, ur12s;
182  integer j;
183  Treal u22abs;
184  integer icmax;
185  Treal bnorm, cnorm, smini;
186 #define ci (equiv_0)
187 #define cr (equiv_1)
188  Treal bignum, bi1, bi2, br1, br2, smlnum, xi1, xi2, xr1, xr2,
189  ci21, ci22, cr21, cr22, li21, csi, ui11, lr21, ui12, ui22;
190 #define civ (equiv_0)
191  Treal csr, ur11, ur12, ur22;
192 #define crv (equiv_1)
193 #define a_ref(a_1,a_2) a[(a_2)*a_dim1 + a_1]
194 #define b_ref(a_1,a_2) b[(a_2)*b_dim1 + a_1]
195 #define x_ref(a_1,a_2) x[(a_2)*x_dim1 + a_1]
196 #define ci_ref(a_1,a_2) ci[(a_2)*2 + a_1 - 3]
197 #define cr_ref(a_1,a_2) cr[(a_2)*2 + a_1 - 3]
198 #define ipivot_ref(a_1,a_2) ipivot[(a_2)*4 + a_1 - 5]
199 
200  a_dim1 = *lda;
201  a_offset = 1 + a_dim1 * 1;
202  a -= a_offset;
203  b_dim1 = *ldb;
204  b_offset = 1 + b_dim1 * 1;
205  b -= b_offset;
206  x_dim1 = *ldx;
207  x_offset = 1 + x_dim1 * 1;
208  x -= x_offset;
209 
210  /* Function Body
211 
212  Compute BIGNUM */
213 
214  smlnum = 2. * template_lapack_lamch("Safe minimum", (Treal)0);
215  bignum = 1. / smlnum;
216  smini = maxMACRO(*smin,smlnum);
217 
218 /* Don't check for input errors */
219 
220  *info = 0;
221 
222 /* Standard Initializations */
223 
224  *scale = 1.;
225 
226  if (*na == 1) {
227 
228 /* 1 x 1 (i.e., scalar) system C X = B */
229 
230  if (*nw == 1) {
231 
232 /* Real 1x1 system.
233 
234  C = ca A - w D */
235 
236  csr = *ca * a_ref(1, 1) - *wr * *d1;
237  cnorm = absMACRO(csr);
238 
239 /* If | C | < SMINI, use C = SMINI */
240 
241  if (cnorm < smini) {
242  csr = smini;
243  cnorm = smini;
244  *info = 1;
245  }
246 
247 /* Check scaling for X = B / C */
248 
249  bnorm = (d__1 = b_ref(1, 1), absMACRO(d__1));
250  if (cnorm < 1. && bnorm > 1.) {
251  if (bnorm > bignum * cnorm) {
252  *scale = 1. / bnorm;
253  }
254  }
255 
256 /* Compute X */
257 
258  x_ref(1, 1) = b_ref(1, 1) * *scale / csr;
259  *xnorm = (d__1 = x_ref(1, 1), absMACRO(d__1));
260  } else {
261 
262 /* Complex 1x1 system (w is complex)
263 
264  C = ca A - w D */
265 
266  csr = *ca * a_ref(1, 1) - *wr * *d1;
267  csi = -(*wi) * *d1;
268  cnorm = absMACRO(csr) + absMACRO(csi);
269 
270 /* If | C | < SMINI, use C = SMINI */
271 
272  if (cnorm < smini) {
273  csr = smini;
274  csi = 0.;
275  cnorm = smini;
276  *info = 1;
277  }
278 
279 /* Check scaling for X = B / C */
280 
281  bnorm = (d__1 = b_ref(1, 1), absMACRO(d__1)) + (d__2 = b_ref(1, 2),
282  absMACRO(d__2));
283  if (cnorm < 1. && bnorm > 1.) {
284  if (bnorm > bignum * cnorm) {
285  *scale = 1. / bnorm;
286  }
287  }
288 
289 /* Compute X */
290 
291  d__1 = *scale * b_ref(1, 1);
292  d__2 = *scale * b_ref(1, 2);
293  template_lapack_ladiv(&d__1, &d__2, &csr, &csi, &x_ref(1, 1), &x_ref(1, 2));
294  *xnorm = (d__1 = x_ref(1, 1), absMACRO(d__1)) + (d__2 = x_ref(1, 2),
295  absMACRO(d__2));
296  }
297 
298  } else {
299 
300 /* 2x2 System
301 
302  Compute the real part of C = ca A - w D (or ca A' - w D ) */
303 
304  cr_ref(1, 1) = *ca * a_ref(1, 1) - *wr * *d1;
305  cr_ref(2, 2) = *ca * a_ref(2, 2) - *wr * *d2;
306  if (*ltrans) {
307  cr_ref(1, 2) = *ca * a_ref(2, 1);
308  cr_ref(2, 1) = *ca * a_ref(1, 2);
309  } else {
310  cr_ref(2, 1) = *ca * a_ref(2, 1);
311  cr_ref(1, 2) = *ca * a_ref(1, 2);
312  }
313 
314  if (*nw == 1) {
315 
316 /* Real 2x2 system (w is real)
317 
318  Find the largest element in C */
319 
320  cmax = 0.;
321  icmax = 0;
322 
323  for (j = 1; j <= 4; ++j) {
324  if ((d__1 = crv[j - 1], absMACRO(d__1)) > cmax) {
325  cmax = (d__1 = crv[j - 1], absMACRO(d__1));
326  icmax = j;
327  }
328 /* L10: */
329  }
330 
331 /* If norm(C) < SMINI, use SMINI*identity. */
332 
333  if (cmax < smini) {
334 /* Computing MAX */
335  d__3 = (d__1 = b_ref(1, 1), absMACRO(d__1)), d__4 = (d__2 = b_ref(
336  2, 1), absMACRO(d__2));
337  bnorm = maxMACRO(d__3,d__4);
338  if (smini < 1. && bnorm > 1.) {
339  if (bnorm > bignum * smini) {
340  *scale = 1. / bnorm;
341  }
342  }
343  temp = *scale / smini;
344  x_ref(1, 1) = temp * b_ref(1, 1);
345  x_ref(2, 1) = temp * b_ref(2, 1);
346  *xnorm = temp * bnorm;
347  *info = 1;
348  return 0;
349  }
350 
351 /* Gaussian elimination with complete pivoting. */
352 
353  ur11 = crv[icmax - 1];
354  cr21 = crv[ipivot_ref(2, icmax) - 1];
355  ur12 = crv[ipivot_ref(3, icmax) - 1];
356  cr22 = crv[ipivot_ref(4, icmax) - 1];
357  ur11r = 1. / ur11;
358  lr21 = ur11r * cr21;
359  ur22 = cr22 - ur12 * lr21;
360 
361 /* If smaller pivot < SMINI, use SMINI */
362 
363  if (absMACRO(ur22) < smini) {
364  ur22 = smini;
365  *info = 1;
366  }
367  if (rswap[icmax - 1]) {
368  br1 = b_ref(2, 1);
369  br2 = b_ref(1, 1);
370  } else {
371  br1 = b_ref(1, 1);
372  br2 = b_ref(2, 1);
373  }
374  br2 -= lr21 * br1;
375 /* Computing MAX */
376  d__2 = (d__1 = br1 * (ur22 * ur11r), absMACRO(d__1)), d__3 = absMACRO(br2);
377  bbnd = maxMACRO(d__2,d__3);
378  if (bbnd > 1. && absMACRO(ur22) < 1.) {
379  if (bbnd >= bignum * absMACRO(ur22)) {
380  *scale = 1. / bbnd;
381  }
382  }
383 
384  xr2 = br2 * *scale / ur22;
385  xr1 = *scale * br1 * ur11r - xr2 * (ur11r * ur12);
386  if (zswap[icmax - 1]) {
387  x_ref(1, 1) = xr2;
388  x_ref(2, 1) = xr1;
389  } else {
390  x_ref(1, 1) = xr1;
391  x_ref(2, 1) = xr2;
392  }
393 /* Computing MAX */
394  d__1 = absMACRO(xr1), d__2 = absMACRO(xr2);
395  *xnorm = maxMACRO(d__1,d__2);
396 
397 /* Further scaling if norm(A) norm(X) > overflow */
398 
399  if (*xnorm > 1. && cmax > 1.) {
400  if (*xnorm > bignum / cmax) {
401  temp = cmax / bignum;
402  x_ref(1, 1) = temp * x_ref(1, 1);
403  x_ref(2, 1) = temp * x_ref(2, 1);
404  *xnorm = temp * *xnorm;
405  *scale = temp * *scale;
406  }
407  }
408  } else {
409 
410 /* Complex 2x2 system (w is complex)
411 
412  Find the largest element in C */
413 
414  ci_ref(1, 1) = -(*wi) * *d1;
415  ci_ref(2, 1) = 0.;
416  ci_ref(1, 2) = 0.;
417  ci_ref(2, 2) = -(*wi) * *d2;
418  cmax = 0.;
419  icmax = 0;
420 
421  for (j = 1; j <= 4; ++j) {
422  if ((d__1 = crv[j - 1], absMACRO(d__1)) + (d__2 = civ[j - 1], absMACRO(
423  d__2)) > cmax) {
424  cmax = (d__1 = crv[j - 1], absMACRO(d__1)) + (d__2 = civ[j - 1]
425  , absMACRO(d__2));
426  icmax = j;
427  }
428 /* L20: */
429  }
430 
431 /* If norm(C) < SMINI, use SMINI*identity. */
432 
433  if (cmax < smini) {
434 /* Computing MAX */
435  d__5 = (d__1 = b_ref(1, 1), absMACRO(d__1)) + (d__2 = b_ref(1, 2),
436  absMACRO(d__2)), d__6 = (d__3 = b_ref(2, 1), absMACRO(d__3)) + (
437  d__4 = b_ref(2, 2), absMACRO(d__4));
438  bnorm = maxMACRO(d__5,d__6);
439  if (smini < 1. && bnorm > 1.) {
440  if (bnorm > bignum * smini) {
441  *scale = 1. / bnorm;
442  }
443  }
444  temp = *scale / smini;
445  x_ref(1, 1) = temp * b_ref(1, 1);
446  x_ref(2, 1) = temp * b_ref(2, 1);
447  x_ref(1, 2) = temp * b_ref(1, 2);
448  x_ref(2, 2) = temp * b_ref(2, 2);
449  *xnorm = temp * bnorm;
450  *info = 1;
451  return 0;
452  }
453 
454 /* Gaussian elimination with complete pivoting. */
455 
456  ur11 = crv[icmax - 1];
457  ui11 = civ[icmax - 1];
458  cr21 = crv[ipivot_ref(2, icmax) - 1];
459  ci21 = civ[ipivot_ref(2, icmax) - 1];
460  ur12 = crv[ipivot_ref(3, icmax) - 1];
461  ui12 = civ[ipivot_ref(3, icmax) - 1];
462  cr22 = crv[ipivot_ref(4, icmax) - 1];
463  ci22 = civ[ipivot_ref(4, icmax) - 1];
464  if (icmax == 1 || icmax == 4) {
465 
466 /* Code when off-diagonals of pivoted C are real */
467 
468  if (absMACRO(ur11) > absMACRO(ui11)) {
469  temp = ui11 / ur11;
470 /* Computing 2nd power */
471  d__1 = temp;
472  ur11r = 1. / (ur11 * (d__1 * d__1 + 1.));
473  ui11r = -temp * ur11r;
474  } else {
475  temp = ur11 / ui11;
476 /* Computing 2nd power */
477  d__1 = temp;
478  ui11r = -1. / (ui11 * (d__1 * d__1 + 1.));
479  ur11r = -temp * ui11r;
480  }
481  lr21 = cr21 * ur11r;
482  li21 = cr21 * ui11r;
483  ur12s = ur12 * ur11r;
484  ui12s = ur12 * ui11r;
485  ur22 = cr22 - ur12 * lr21;
486  ui22 = ci22 - ur12 * li21;
487  } else {
488 
489 /* Code when diagonals of pivoted C are real */
490 
491  ur11r = 1. / ur11;
492  ui11r = 0.;
493  lr21 = cr21 * ur11r;
494  li21 = ci21 * ur11r;
495  ur12s = ur12 * ur11r;
496  ui12s = ui12 * ur11r;
497  ur22 = cr22 - ur12 * lr21 + ui12 * li21;
498  ui22 = -ur12 * li21 - ui12 * lr21;
499  }
500  u22abs = absMACRO(ur22) + absMACRO(ui22);
501 
502 /* If smaller pivot < SMINI, use SMINI */
503 
504  if (u22abs < smini) {
505  ur22 = smini;
506  ui22 = 0.;
507  *info = 1;
508  }
509  if (rswap[icmax - 1]) {
510  br2 = b_ref(1, 1);
511  br1 = b_ref(2, 1);
512  bi2 = b_ref(1, 2);
513  bi1 = b_ref(2, 2);
514  } else {
515  br1 = b_ref(1, 1);
516  br2 = b_ref(2, 1);
517  bi1 = b_ref(1, 2);
518  bi2 = b_ref(2, 2);
519  }
520  br2 = br2 - lr21 * br1 + li21 * bi1;
521  bi2 = bi2 - li21 * br1 - lr21 * bi1;
522 /* Computing MAX */
523  d__1 = (absMACRO(br1) + absMACRO(bi1)) * (u22abs * (absMACRO(ur11r) + absMACRO(ui11r))
524  ), d__2 = absMACRO(br2) + absMACRO(bi2);
525  bbnd = maxMACRO(d__1,d__2);
526  if (bbnd > 1. && u22abs < 1.) {
527  if (bbnd >= bignum * u22abs) {
528  *scale = 1. / bbnd;
529  br1 = *scale * br1;
530  bi1 = *scale * bi1;
531  br2 = *scale * br2;
532  bi2 = *scale * bi2;
533  }
534  }
535 
536  template_lapack_ladiv(&br2, &bi2, &ur22, &ui22, &xr2, &xi2);
537  xr1 = ur11r * br1 - ui11r * bi1 - ur12s * xr2 + ui12s * xi2;
538  xi1 = ui11r * br1 + ur11r * bi1 - ui12s * xr2 - ur12s * xi2;
539  if (zswap[icmax - 1]) {
540  x_ref(1, 1) = xr2;
541  x_ref(2, 1) = xr1;
542  x_ref(1, 2) = xi2;
543  x_ref(2, 2) = xi1;
544  } else {
545  x_ref(1, 1) = xr1;
546  x_ref(2, 1) = xr2;
547  x_ref(1, 2) = xi1;
548  x_ref(2, 2) = xi2;
549  }
550 /* Computing MAX */
551  d__1 = absMACRO(xr1) + absMACRO(xi1), d__2 = absMACRO(xr2) + absMACRO(xi2);
552  *xnorm = maxMACRO(d__1,d__2);
553 
554 /* Further scaling if norm(A) norm(X) > overflow */
555 
556  if (*xnorm > 1. && cmax > 1.) {
557  if (*xnorm > bignum / cmax) {
558  temp = cmax / bignum;
559  x_ref(1, 1) = temp * x_ref(1, 1);
560  x_ref(2, 1) = temp * x_ref(2, 1);
561  x_ref(1, 2) = temp * x_ref(1, 2);
562  x_ref(2, 2) = temp * x_ref(2, 2);
563  *xnorm = temp * *xnorm;
564  *scale = temp * *scale;
565  }
566  }
567  }
568  }
569 
570  return 0;
571 
572 /* End of DLALN2 */
573 
574 } /* dlaln2_ */
575 
576 #undef ipivot_ref
577 #undef cr_ref
578 #undef ci_ref
579 #undef x_ref
580 #undef b_ref
581 #undef a_ref
582 #undef crv
583 #undef civ
584 #undef cr
585 #undef ci
586 
587 
588 #endif
#define absMACRO(x)
Definition: template_blas_common.h:47
#define crv
int integer
Definition: template_blas_common.h:40
int template_lapack_ladiv(const Treal *a, const Treal *b, const Treal *c__, const Treal *d__, Treal *p, Treal *q)
Definition: template_lapack_ladiv.h:42
#define maxMACRO(a, b)
Definition: template_blas_common.h:45
#define a_ref(a_1, a_2)
#define ipivot_ref(a_1, a_2)
#define b_ref(a_1, a_2)
#define x_ref(a_1, a_2)
#define cr_ref(a_1, a_2)
Treal template_lapack_lamch(const char *cmach, Treal dummyReal)
Definition: template_lapack_lamch.h:202
bool logical
Definition: template_blas_common.h:41
#define TRUE_
Definition: template_lapack_common.h:42
int template_lapack_laln2(const logical *ltrans, const integer *na, const integer *nw, const Treal *smin, const Treal *ca, const Treal *a, const integer *lda, const Treal *d1, const Treal *d2, const Treal *b, const integer *ldb, const Treal *wr, const Treal *wi, Treal *x, const integer *ldx, Treal *scale, Treal *xnorm, integer *info)
Definition: template_lapack_laln2.h:42
#define FALSE_
Definition: template_lapack_common.h:43
#define civ
#define ci_ref(a_1, a_2)