ergo
template_lapack_ggbal.h
Go to the documentation of this file.
1 /* Ergo, version 3.7, a program for linear scaling electronic structure
2  * calculations.
3  * Copyright (C) 2018 Elias Rudberg, Emanuel H. Rubensson, Pawel Salek,
4  * and Anastasia Kruchinina.
5  *
6  * This program is free software: you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation, either version 3 of the License, or
9  * (at your option) any later version.
10  *
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program. If not, see <http://www.gnu.org/licenses/>.
18  *
19  * Primary academic reference:
20  * Ergo: An open-source program for linear-scaling electronic structure
21  * calculations,
22  * Elias Rudberg, Emanuel H. Rubensson, Pawel Salek, and Anastasia
23  * Kruchinina,
24  * SoftwareX 7, 107 (2018),
25  * <http://dx.doi.org/10.1016/j.softx.2018.03.005>
26  *
27  * For further information about Ergo, see <http://www.ergoscf.org>.
28  */
29 
30  /* This file belongs to the template_lapack part of the Ergo source
31  * code. The source files in the template_lapack directory are modified
32  * versions of files originally distributed as CLAPACK, see the
33  * Copyright/license notice in the file template_lapack/COPYING.
34  */
35 
36 
37 #ifndef TEMPLATE_LAPACK_GGBAL_HEADER
38 #define TEMPLATE_LAPACK_GGBAL_HEADER
39 
40 
41 template<class Treal>
42 int template_lapack_ggbal(const char *job, const integer *n, Treal *a, const integer *
43  lda, Treal *b, const integer *ldb, integer *ilo, integer *ihi,
44  Treal *lscale, Treal *rscale, Treal *work, integer *
45  info)
46 {
47 /* -- LAPACK routine (version 3.0) --
48  Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
49  Courant Institute, Argonne National Lab, and Rice University
50  September 30, 1994
51 
52 
53  Purpose
54  =======
55 
56  DGGBAL balances a pair of general real matrices (A,B). This
57  involves, first, permuting A and B by similarity transformations to
58  isolate eigenvalues in the first 1 to ILO$-$1 and last IHI+1 to N
59  elements on the diagonal; and second, applying a diagonal similarity
60  transformation to rows and columns ILO to IHI to make the rows
61  and columns as close in norm as possible. Both steps are optional.
62 
63  Balancing may reduce the 1-norm of the matrices, and improve the
64  accuracy of the computed eigenvalues and/or eigenvectors in the
65  generalized eigenvalue problem A*x = lambda*B*x.
66 
67  Arguments
68  =========
69 
70  JOB (input) CHARACTER*1
71  Specifies the operations to be performed on A and B:
72  = 'N': none: simply set ILO = 1, IHI = N, LSCALE(I) = 1.0
73  and RSCALE(I) = 1.0 for i = 1,...,N.
74  = 'P': permute only;
75  = 'S': scale only;
76  = 'B': both permute and scale.
77 
78  N (input) INTEGER
79  The order of the matrices A and B. N >= 0.
80 
81  A (input/output) DOUBLE PRECISION array, dimension (LDA,N)
82  On entry, the input matrix A.
83  On exit, A is overwritten by the balanced matrix.
84  If JOB = 'N', A is not referenced.
85 
86  LDA (input) INTEGER
87  The leading dimension of the array A. LDA >= max(1,N).
88 
89  B (input/output) DOUBLE PRECISION array, dimension (LDB,N)
90  On entry, the input matrix B.
91  On exit, B is overwritten by the balanced matrix.
92  If JOB = 'N', B is not referenced.
93 
94  LDB (input) INTEGER
95  The leading dimension of the array B. LDB >= max(1,N).
96 
97  ILO (output) INTEGER
98  IHI (output) INTEGER
99  ILO and IHI are set to integers such that on exit
100  A(i,j) = 0 and B(i,j) = 0 if i > j and
101  j = 1,...,ILO-1 or i = IHI+1,...,N.
102  If JOB = 'N' or 'S', ILO = 1 and IHI = N.
103 
104  LSCALE (output) DOUBLE PRECISION array, dimension (N)
105  Details of the permutations and scaling factors applied
106  to the left side of A and B. If P(j) is the index of the
107  row interchanged with row j, and D(j)
108  is the scaling factor applied to row j, then
109  LSCALE(j) = P(j) for J = 1,...,ILO-1
110  = D(j) for J = ILO,...,IHI
111  = P(j) for J = IHI+1,...,N.
112  The order in which the interchanges are made is N to IHI+1,
113  then 1 to ILO-1.
114 
115  RSCALE (output) DOUBLE PRECISION array, dimension (N)
116  Details of the permutations and scaling factors applied
117  to the right side of A and B. If P(j) is the index of the
118  column interchanged with column j, and D(j)
119  is the scaling factor applied to column j, then
120  LSCALE(j) = P(j) for J = 1,...,ILO-1
121  = D(j) for J = ILO,...,IHI
122  = P(j) for J = IHI+1,...,N.
123  The order in which the interchanges are made is N to IHI+1,
124  then 1 to ILO-1.
125 
126  WORK (workspace) DOUBLE PRECISION array, dimension (6*N)
127 
128  INFO (output) INTEGER
129  = 0: successful exit
130  < 0: if INFO = -i, the i-th argument had an illegal value.
131 
132  Further Details
133  ===============
134 
135  See R.C. WARD, Balancing the generalized eigenvalue problem,
136  SIAM J. Sci. Stat. Comp. 2 (1981), 141-152.
137 
138  =====================================================================
139 
140 
141  Test the input parameters
142 
143  Parameter adjustments */
144  /* Table of constant values */
145  integer c__1 = 1;
146  Treal c_b34 = 10.;
147  Treal c_b70 = .5;
148 
149  /* System generated locals */
150  integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2, i__3;
151  Treal d__1, d__2, d__3;
152  /* Local variables */
153  integer lcab;
154  Treal beta, coef;
155  integer irab, lrab;
156  Treal basl, cmax;
157  Treal coef2, coef5;
158  integer i__, j, k, l, m;
159  Treal gamma, t, alpha;
160  Treal sfmin, sfmax;
161  integer iflow;
162  integer kount, jc;
163  Treal ta, tb, tc;
164  integer ir, it;
165  Treal ew;
166  integer nr;
167  Treal pgamma;
168  integer lsfmin, lsfmax, ip1, jp1, lm1;
169  Treal cab, rab, ewc, cor, sum;
170  integer nrp2, icab;
171 #define a_ref(a_1,a_2) a[(a_2)*a_dim1 + a_1]
172 #define b_ref(a_1,a_2) b[(a_2)*b_dim1 + a_1]
173 
174 
175  a_dim1 = *lda;
176  a_offset = 1 + a_dim1 * 1;
177  a -= a_offset;
178  b_dim1 = *ldb;
179  b_offset = 1 + b_dim1 * 1;
180  b -= b_offset;
181  --lscale;
182  --rscale;
183  --work;
184 
185  /* Initialization added by Elias to get rid of compiler warnings. */
186  pgamma = 0;
187  /* Function Body */
188  *info = 0;
189  if (! template_blas_lsame(job, "N") && ! template_blas_lsame(job, "P") && ! template_blas_lsame(job, "S")
190  && ! template_blas_lsame(job, "B")) {
191  *info = -1;
192  } else if (*n < 0) {
193  *info = -2;
194  } else if (*lda < maxMACRO(1,*n)) {
195  *info = -4;
196  } else if (*ldb < maxMACRO(1,*n)) {
197  *info = -5;
198  }
199  if (*info != 0) {
200  i__1 = -(*info);
201  template_blas_erbla("GGBAL ", &i__1);
202  return 0;
203  }
204 
205  k = 1;
206  l = *n;
207 
208 /* Quick return if possible */
209 
210  if (*n == 0) {
211  return 0;
212  }
213 
214  if (template_blas_lsame(job, "N")) {
215  *ilo = 1;
216  *ihi = *n;
217  i__1 = *n;
218  for (i__ = 1; i__ <= i__1; ++i__) {
219  lscale[i__] = 1.;
220  rscale[i__] = 1.;
221 /* L10: */
222  }
223  return 0;
224  }
225 
226  if (k == l) {
227  *ilo = 1;
228  *ihi = 1;
229  lscale[1] = 1.;
230  rscale[1] = 1.;
231  return 0;
232  }
233 
234  if (template_blas_lsame(job, "S")) {
235  goto L190;
236  }
237 
238  goto L30;
239 
240 /* Permute the matrices A and B to isolate the eigenvalues.
241 
242  Find row with one nonzero in columns 1 through L */
243 
244 L20:
245  l = lm1;
246  if (l != 1) {
247  goto L30;
248  }
249 
250  rscale[1] = 1.;
251  lscale[1] = 1.;
252  goto L190;
253 
254 L30:
255  lm1 = l - 1;
256  for (i__ = l; i__ >= 1; --i__) {
257  i__1 = lm1;
258  for (j = 1; j <= i__1; ++j) {
259  jp1 = j + 1;
260  if (a_ref(i__, j) != 0. || b_ref(i__, j) != 0.) {
261  goto L50;
262  }
263 /* L40: */
264  }
265  j = l;
266  goto L70;
267 
268 L50:
269  i__1 = l;
270  for (j = jp1; j <= i__1; ++j) {
271  if (a_ref(i__, j) != 0. || b_ref(i__, j) != 0.) {
272  goto L80;
273  }
274 /* L60: */
275  }
276  j = jp1 - 1;
277 
278 L70:
279  m = l;
280  iflow = 1;
281  goto L160;
282 L80:
283  ;
284  }
285  goto L100;
286 
287 /* Find column with one nonzero in rows K through N */
288 
289 L90:
290  ++k;
291 
292 L100:
293  i__1 = l;
294  for (j = k; j <= i__1; ++j) {
295  i__2 = lm1;
296  for (i__ = k; i__ <= i__2; ++i__) {
297  ip1 = i__ + 1;
298  if (a_ref(i__, j) != 0. || b_ref(i__, j) != 0.) {
299  goto L120;
300  }
301 /* L110: */
302  }
303  i__ = l;
304  goto L140;
305 L120:
306  i__2 = l;
307  for (i__ = ip1; i__ <= i__2; ++i__) {
308  if (a_ref(i__, j) != 0. || b_ref(i__, j) != 0.) {
309  goto L150;
310  }
311 /* L130: */
312  }
313  i__ = ip1 - 1;
314 L140:
315  m = k;
316  iflow = 2;
317  goto L160;
318 L150:
319  ;
320  }
321  goto L190;
322 
323 /* Permute rows M and I */
324 
325 L160:
326  lscale[m] = (Treal) i__;
327  if (i__ == m) {
328  goto L170;
329  }
330  i__1 = *n - k + 1;
331  template_blas_swap(&i__1, &a_ref(i__, k), lda, &a_ref(m, k), lda);
332  i__1 = *n - k + 1;
333  template_blas_swap(&i__1, &b_ref(i__, k), ldb, &b_ref(m, k), ldb);
334 
335 /* Permute columns M and J */
336 
337 L170:
338  rscale[m] = (Treal) j;
339  if (j == m) {
340  goto L180;
341  }
342  template_blas_swap(&l, &a_ref(1, j), &c__1, &a_ref(1, m), &c__1);
343  template_blas_swap(&l, &b_ref(1, j), &c__1, &b_ref(1, m), &c__1);
344 
345 L180:
346  switch (iflow) {
347  case 1: goto L20;
348  case 2: goto L90;
349  }
350 
351 L190:
352  *ilo = k;
353  *ihi = l;
354 
355  if (*ilo == *ihi) {
356  return 0;
357  }
358 
359  if (template_blas_lsame(job, "P")) {
360  return 0;
361  }
362 
363 /* Balance the submatrix in rows ILO to IHI. */
364 
365  nr = *ihi - *ilo + 1;
366  i__1 = *ihi;
367  for (i__ = *ilo; i__ <= i__1; ++i__) {
368  rscale[i__] = 0.;
369  lscale[i__] = 0.;
370 
371  work[i__] = 0.;
372  work[i__ + *n] = 0.;
373  work[i__ + (*n << 1)] = 0.;
374  work[i__ + *n * 3] = 0.;
375  work[i__ + (*n << 2)] = 0.;
376  work[i__ + *n * 5] = 0.;
377 /* L200: */
378  }
379 
380 /* Compute right side vector in resulting linear equations */
381 
382  basl = template_blas_lg10(&c_b34);
383  i__1 = *ihi;
384  for (i__ = *ilo; i__ <= i__1; ++i__) {
385  i__2 = *ihi;
386  for (j = *ilo; j <= i__2; ++j) {
387  tb = b_ref(i__, j);
388  ta = a_ref(i__, j);
389  if (ta == 0.) {
390  goto L210;
391  }
392  d__1 = absMACRO(ta);
393  ta = template_blas_lg10(&d__1) / basl;
394 L210:
395  if (tb == 0.) {
396  goto L220;
397  }
398  d__1 = absMACRO(tb);
399  tb = template_blas_lg10(&d__1) / basl;
400 L220:
401  work[i__ + (*n << 2)] = work[i__ + (*n << 2)] - ta - tb;
402  work[j + *n * 5] = work[j + *n * 5] - ta - tb;
403 /* L230: */
404  }
405 /* L240: */
406  }
407 
408  coef = 1. / (Treal) (nr << 1);
409  coef2 = coef * coef;
410  coef5 = coef2 * .5;
411  nrp2 = nr + 2;
412  beta = 0.;
413  it = 1;
414 
415 /* Start generalized conjugate gradient iteration */
416 
417 L250:
418 
419  gamma = template_blas_dot(&nr, &work[*ilo + (*n << 2)], &c__1, &work[*ilo + (*n << 2)]
420  , &c__1) + template_blas_dot(&nr, &work[*ilo + *n * 5], &c__1, &work[*ilo + *
421  n * 5], &c__1);
422 
423  ew = 0.;
424  ewc = 0.;
425  i__1 = *ihi;
426  for (i__ = *ilo; i__ <= i__1; ++i__) {
427  ew += work[i__ + (*n << 2)];
428  ewc += work[i__ + *n * 5];
429 /* L260: */
430  }
431 
432 /* Computing 2nd power */
433  d__1 = ew;
434 /* Computing 2nd power */
435  d__2 = ewc;
436 /* Computing 2nd power */
437  d__3 = ew - ewc;
438  gamma = coef * gamma - coef2 * (d__1 * d__1 + d__2 * d__2) - coef5 * (
439  d__3 * d__3);
440  if (gamma == 0.) {
441  goto L350;
442  }
443  if (it != 1) {
444  beta = gamma / pgamma;
445  }
446  t = coef5 * (ewc - ew * 3.);
447  tc = coef5 * (ew - ewc * 3.);
448 
449  template_blas_scal(&nr, &beta, &work[*ilo], &c__1);
450  template_blas_scal(&nr, &beta, &work[*ilo + *n], &c__1);
451 
452  template_blas_axpy(&nr, &coef, &work[*ilo + (*n << 2)], &c__1, &work[*ilo + *n], &
453  c__1);
454  template_blas_axpy(&nr, &coef, &work[*ilo + *n * 5], &c__1, &work[*ilo], &c__1);
455 
456  i__1 = *ihi;
457  for (i__ = *ilo; i__ <= i__1; ++i__) {
458  work[i__] += tc;
459  work[i__ + *n] += t;
460 /* L270: */
461  }
462 
463 /* Apply matrix to vector */
464 
465  i__1 = *ihi;
466  for (i__ = *ilo; i__ <= i__1; ++i__) {
467  kount = 0;
468  sum = 0.;
469  i__2 = *ihi;
470  for (j = *ilo; j <= i__2; ++j) {
471  if (a_ref(i__, j) == 0.) {
472  goto L280;
473  }
474  ++kount;
475  sum += work[j];
476 L280:
477  if (b_ref(i__, j) == 0.) {
478  goto L290;
479  }
480  ++kount;
481  sum += work[j];
482 L290:
483  ;
484  }
485  work[i__ + (*n << 1)] = (Treal) kount * work[i__ + *n] + sum;
486 /* L300: */
487  }
488 
489  i__1 = *ihi;
490  for (j = *ilo; j <= i__1; ++j) {
491  kount = 0;
492  sum = 0.;
493  i__2 = *ihi;
494  for (i__ = *ilo; i__ <= i__2; ++i__) {
495  if (a_ref(i__, j) == 0.) {
496  goto L310;
497  }
498  ++kount;
499  sum += work[i__ + *n];
500 L310:
501  if (b_ref(i__, j) == 0.) {
502  goto L320;
503  }
504  ++kount;
505  sum += work[i__ + *n];
506 L320:
507  ;
508  }
509  work[j + *n * 3] = (Treal) kount * work[j] + sum;
510 /* L330: */
511  }
512 
513  sum = template_blas_dot(&nr, &work[*ilo + *n], &c__1, &work[*ilo + (*n << 1)], &c__1)
514  + template_blas_dot(&nr, &work[*ilo], &c__1, &work[*ilo + *n * 3], &c__1);
515  alpha = gamma / sum;
516 
517 /* Determine correction to current iteration */
518 
519  cmax = 0.;
520  i__1 = *ihi;
521  for (i__ = *ilo; i__ <= i__1; ++i__) {
522  cor = alpha * work[i__ + *n];
523  if (absMACRO(cor) > cmax) {
524  cmax = absMACRO(cor);
525  }
526  lscale[i__] += cor;
527  cor = alpha * work[i__];
528  if (absMACRO(cor) > cmax) {
529  cmax = absMACRO(cor);
530  }
531  rscale[i__] += cor;
532 /* L340: */
533  }
534  if (cmax < .5) {
535  goto L350;
536  }
537 
538  d__1 = -alpha;
539  template_blas_axpy(&nr, &d__1, &work[*ilo + (*n << 1)], &c__1, &work[*ilo + (*n << 2)]
540  , &c__1);
541  d__1 = -alpha;
542  template_blas_axpy(&nr, &d__1, &work[*ilo + *n * 3], &c__1, &work[*ilo + *n * 5], &
543  c__1);
544 
545  pgamma = gamma;
546  ++it;
547  if (it <= nrp2) {
548  goto L250;
549  }
550 
551 /* End generalized conjugate gradient iteration */
552 
553 L350:
554  sfmin = template_lapack_lamch("S", (Treal)0);
555  sfmax = 1. / sfmin;
556  lsfmin = (integer) (template_blas_lg10(&sfmin) / basl + 1.);
557  lsfmax = (integer) (template_blas_lg10(&sfmax) / basl);
558  i__1 = *ihi;
559  for (i__ = *ilo; i__ <= i__1; ++i__) {
560  i__2 = *n - *ilo + 1;
561  irab = template_blas_idamax(&i__2, &a_ref(i__, *ilo), lda);
562  rab = (d__1 = a_ref(i__, irab + *ilo - 1), absMACRO(d__1));
563  i__2 = *n - *ilo + 1;
564  irab = template_blas_idamax(&i__2, &b_ref(i__, *ilo), lda);
565 /* Computing MAX */
566  d__2 = rab, d__3 = (d__1 = b_ref(i__, irab + *ilo - 1), absMACRO(d__1));
567  rab = maxMACRO(d__2,d__3);
568  d__1 = rab + sfmin;
569  lrab = (integer) (template_blas_lg10(&d__1) / basl + 1.);
570  ir = (integer) (lscale[i__] + template_lapack_d_sign(&c_b70, &lscale[i__]));
571 /* Computing MIN */
572  i__2 = maxMACRO(ir,lsfmin), i__2 = minMACRO(i__2,lsfmax), i__3 = lsfmax - lrab;
573  ir = minMACRO(i__2,i__3);
574  lscale[i__] = template_lapack_pow_di(&c_b34, &ir);
575  icab = template_blas_idamax(ihi, &a_ref(1, i__), &c__1);
576  cab = (d__1 = a_ref(icab, i__), absMACRO(d__1));
577  icab = template_blas_idamax(ihi, &b_ref(1, i__), &c__1);
578 /* Computing MAX */
579  d__2 = cab, d__3 = (d__1 = b_ref(icab, i__), absMACRO(d__1));
580  cab = maxMACRO(d__2,d__3);
581  d__1 = cab + sfmin;
582  lcab = (integer) (template_blas_lg10(&d__1) / basl + 1.);
583  jc = (integer) (rscale[i__] + template_lapack_d_sign(&c_b70, &rscale[i__]));
584 /* Computing MIN */
585  i__2 = maxMACRO(jc,lsfmin), i__2 = minMACRO(i__2,lsfmax), i__3 = lsfmax - lcab;
586  jc = minMACRO(i__2,i__3);
587  rscale[i__] = template_lapack_pow_di(&c_b34, &jc);
588 /* L360: */
589  }
590 
591 /* Row scaling of matrices A and B */
592 
593  i__1 = *ihi;
594  for (i__ = *ilo; i__ <= i__1; ++i__) {
595  i__2 = *n - *ilo + 1;
596  template_blas_scal(&i__2, &lscale[i__], &a_ref(i__, *ilo), lda);
597  i__2 = *n - *ilo + 1;
598  template_blas_scal(&i__2, &lscale[i__], &b_ref(i__, *ilo), ldb);
599 /* L370: */
600  }
601 
602 /* Column scaling of matrices A and B */
603 
604  i__1 = *ihi;
605  for (j = *ilo; j <= i__1; ++j) {
606  template_blas_scal(ihi, &rscale[j], &a_ref(1, j), &c__1);
607  template_blas_scal(ihi, &rscale[j], &b_ref(1, j), &c__1);
608 /* L380: */
609  }
610 
611  return 0;
612 
613 /* End of DGGBAL */
614 
615 } /* dggbal_ */
616 
617 #undef b_ref
618 #undef a_ref
619 
620 
621 #endif
int template_blas_scal(const integer *n, const Treal *da, Treal *dx, const integer *incx)
Definition: template_blas_scal.h:43
#define absMACRO(x)
Definition: template_blas_common.h:47
integer template_blas_idamax(const integer *n, const Treal *dx, const integer *incx)
Definition: template_blas_idamax.h:42
int integer
Definition: template_blas_common.h:40
#define b_ref(a_1, a_2)
Treal template_blas_lg10(Treal *x)
Definition: template_lapack_lamch.h:60
#define maxMACRO(a, b)
Definition: template_blas_common.h:45
#define minMACRO(a, b)
Definition: template_blas_common.h:46
Treal template_lapack_d_sign(const Treal *a, const Treal *b)
Definition: template_lapack_lamch.h:48
#define a_ref(a_1, a_2)
int template_blas_erbla(const char *srname, integer *info)
Definition: template_blas_common.cc:146
int template_blas_swap(const integer *n, Treal *dx, const integer *incx, Treal *dy, const integer *incy)
Definition: template_blas_swap.h:42
Treal template_lapack_lamch(const char *cmach, Treal dummyReal)
Definition: template_lapack_lamch.h:202
double template_lapack_pow_di(Treal *ap, integer *bp)
Definition: template_lapack_lamch.h:168
int template_blas_axpy(const integer *n, const Treal *da, const Treal *dx, const integer *incx, Treal *dy, const integer *incy)
Definition: template_blas_axpy.h:43
int template_lapack_ggbal(const char *job, const integer *n, Treal *a, const integer *lda, Treal *b, const integer *ldb, integer *ilo, integer *ihi, Treal *lscale, Treal *rscale, Treal *work, integer *info)
Definition: template_lapack_ggbal.h:42
logical template_blas_lsame(const char *ca, const char *cb)
Definition: template_blas_common.cc:46
Treal template_blas_dot(const integer *n, const Treal *dx, const integer *incx, const Treal *dy, const integer *incy)
Definition: template_blas_dot.h:43