ergo
template_lapack_larrj.h
Go to the documentation of this file.
1 /* Ergo, version 3.7, a program for linear scaling electronic structure
2  * calculations.
3  * Copyright (C) 2018 Elias Rudberg, Emanuel H. Rubensson, Pawel Salek,
4  * and Anastasia Kruchinina.
5  *
6  * This program is free software: you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation, either version 3 of the License, or
9  * (at your option) any later version.
10  *
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program. If not, see <http://www.gnu.org/licenses/>.
18  *
19  * Primary academic reference:
20  * Ergo: An open-source program for linear-scaling electronic structure
21  * calculations,
22  * Elias Rudberg, Emanuel H. Rubensson, Pawel Salek, and Anastasia
23  * Kruchinina,
24  * SoftwareX 7, 107 (2018),
25  * <http://dx.doi.org/10.1016/j.softx.2018.03.005>
26  *
27  * For further information about Ergo, see <http://www.ergoscf.org>.
28  */
29 
30  /* This file belongs to the template_lapack part of the Ergo source
31  * code. The source files in the template_lapack directory are modified
32  * versions of files originally distributed as CLAPACK, see the
33  * Copyright/license notice in the file template_lapack/COPYING.
34  */
35 
36 
37 #ifndef TEMPLATE_LAPACK_LARRJ_HEADER
38 #define TEMPLATE_LAPACK_LARRJ_HEADER
39 
40 template<class Treal>
41 int template_lapack_larrj(integer *n, Treal *d__, Treal *e2,
42  integer *ifirst, integer *ilast, Treal *rtol, integer *offset,
43  Treal *w, Treal *werr, Treal *work, integer *iwork,
44  Treal *pivmin, Treal *spdiam, integer *info)
45 {
46  /* System generated locals */
47  integer i__1, i__2;
48  Treal d__1, d__2;
49 
50 
51  /* Local variables */
52  integer i__, j, k, p;
53  Treal s;
54  integer i1, i2, ii;
55  Treal fac, mid;
56  integer cnt;
57  Treal tmp, left;
58  integer iter, nint, prev, next, savi1;
59  Treal right, width, dplus;
60  integer olnint, maxitr;
61 
62 
63 /* -- LAPACK auxiliary routine (version 3.2) -- */
64 /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
65 /* November 2006 */
66 
67 /* .. Scalar Arguments .. */
68 /* .. */
69 /* .. Array Arguments .. */
70 /* .. */
71 
72 /* Purpose */
73 /* ======= */
74 
75 /* Given the initial eigenvalue approximations of T, DLARRJ */
76 /* does bisection to refine the eigenvalues of T, */
77 /* W( IFIRST-OFFSET ) through W( ILAST-OFFSET ), to more accuracy. Initial */
78 /* guesses for these eigenvalues are input in W, the corresponding estimate */
79 /* of the error in these guesses in WERR. During bisection, intervals */
80 /* [left, right] are maintained by storing their mid-points and */
81 /* semi-widths in the arrays W and WERR respectively. */
82 
83 /* Arguments */
84 /* ========= */
85 
86 /* N (input) INTEGER */
87 /* The order of the matrix. */
88 
89 /* D (input) DOUBLE PRECISION array, dimension (N) */
90 /* The N diagonal elements of T. */
91 
92 /* E2 (input) DOUBLE PRECISION array, dimension (N-1) */
93 /* The Squares of the (N-1) subdiagonal elements of T. */
94 
95 /* IFIRST (input) INTEGER */
96 /* The index of the first eigenvalue to be computed. */
97 
98 /* ILAST (input) INTEGER */
99 /* The index of the last eigenvalue to be computed. */
100 
101 /* RTOL (input) DOUBLE PRECISION */
102 /* Tolerance for the convergence of the bisection intervals. */
103 /* An interval [LEFT,RIGHT] has converged if */
104 /* RIGHT-LEFT.LT.RTOL*MAX(|LEFT|,|RIGHT|). */
105 
106 /* OFFSET (input) INTEGER */
107 /* Offset for the arrays W and WERR, i.e., the IFIRST-OFFSET */
108 /* through ILAST-OFFSET elements of these arrays are to be used. */
109 
110 /* W (input/output) DOUBLE PRECISION array, dimension (N) */
111 /* On input, W( IFIRST-OFFSET ) through W( ILAST-OFFSET ) are */
112 /* estimates of the eigenvalues of L D L^T indexed IFIRST through */
113 /* ILAST. */
114 /* On output, these estimates are refined. */
115 
116 /* WERR (input/output) DOUBLE PRECISION array, dimension (N) */
117 /* On input, WERR( IFIRST-OFFSET ) through WERR( ILAST-OFFSET ) are */
118 /* the errors in the estimates of the corresponding elements in W. */
119 /* On output, these errors are refined. */
120 
121 /* WORK (workspace) DOUBLE PRECISION array, dimension (2*N) */
122 /* Workspace. */
123 
124 /* IWORK (workspace) INTEGER array, dimension (2*N) */
125 /* Workspace. */
126 
127 /* PIVMIN (input) DOUBLE PRECISION */
128 /* The minimum pivot in the Sturm sequence for T. */
129 
130 /* SPDIAM (input) DOUBLE PRECISION */
131 /* The spectral diameter of T. */
132 
133 /* INFO (output) INTEGER */
134 /* Error flag. */
135 
136 /* Further Details */
137 /* =============== */
138 
139 /* Based on contributions by */
140 /* Beresford Parlett, University of California, Berkeley, USA */
141 /* Jim Demmel, University of California, Berkeley, USA */
142 /* Inderjit Dhillon, University of Texas, Austin, USA */
143 /* Osni Marques, LBNL/NERSC, USA */
144 /* Christof Voemel, University of California, Berkeley, USA */
145 
146 /* ===================================================================== */
147 
148 /* .. Parameters .. */
149 /* .. */
150 /* .. Local Scalars .. */
151 
152 /* .. */
153 /* .. Intrinsic Functions .. */
154 /* .. */
155 /* .. Executable Statements .. */
156 
157  /* Parameter adjustments */
158  --iwork;
159  --work;
160  --werr;
161  --w;
162  --e2;
163  --d__;
164 
165  /* Function Body */
166  *info = 0;
167 
168  maxitr = (integer) ((template_blas_log(*spdiam + *pivmin) - template_blas_log(*pivmin)) / template_blas_log(2.)) +
169  2;
170 
171 /* Initialize unconverged intervals in [ WORK(2*I-1), WORK(2*I) ]. */
172 /* The Sturm Count, Count( WORK(2*I-1) ) is arranged to be I-1, while */
173 /* Count( WORK(2*I) ) is stored in IWORK( 2*I ). The integer IWORK( 2*I-1 ) */
174 /* for an unconverged interval is set to the index of the next unconverged */
175 /* interval, and is -1 or 0 for a converged interval. Thus a linked */
176 /* list of unconverged intervals is set up. */
177 
178  i1 = *ifirst;
179  i2 = *ilast;
180 /* The number of unconverged intervals */
181  nint = 0;
182 /* The last unconverged interval found */
183  prev = 0;
184  i__1 = i2;
185  for (i__ = i1; i__ <= i__1; ++i__) {
186  k = i__ << 1;
187  ii = i__ - *offset;
188  left = w[ii] - werr[ii];
189  mid = w[ii];
190  right = w[ii] + werr[ii];
191  width = right - mid;
192 /* Computing MAX */
193  d__1 = absMACRO(left), d__2 = absMACRO(right);
194  tmp = maxMACRO(d__1,d__2);
195 /* The following test prevents the test of converged intervals */
196  if (width < *rtol * tmp) {
197 /* This interval has already converged and does not need refinement. */
198 /* (Note that the gaps might change through refining the */
199 /* eigenvalues, however, they can only get bigger.) */
200 /* Remove it from the list. */
201  iwork[k - 1] = -1;
202 /* Make sure that I1 always points to the first unconverged interval */
203  if (i__ == i1 && i__ < i2) {
204  i1 = i__ + 1;
205  }
206  if (prev >= i1 && i__ <= i2) {
207  iwork[(prev << 1) - 1] = i__ + 1;
208  }
209  } else {
210 /* unconverged interval found */
211  prev = i__;
212 /* Make sure that [LEFT,RIGHT] contains the desired eigenvalue */
213 
214 /* Do while( CNT(LEFT).GT.I-1 ) */
215 
216  fac = 1.;
217 L20:
218  cnt = 0;
219  s = left;
220  dplus = d__[1] - s;
221  if (dplus < 0.) {
222  ++cnt;
223  }
224  i__2 = *n;
225  for (j = 2; j <= i__2; ++j) {
226  dplus = d__[j] - s - e2[j - 1] / dplus;
227  if (dplus < 0.) {
228  ++cnt;
229  }
230 /* L30: */
231  }
232  if (cnt > i__ - 1) {
233  left -= werr[ii] * fac;
234  fac *= 2.;
235  goto L20;
236  }
237 
238 /* Do while( CNT(RIGHT).LT.I ) */
239 
240  fac = 1.;
241 L50:
242  cnt = 0;
243  s = right;
244  dplus = d__[1] - s;
245  if (dplus < 0.) {
246  ++cnt;
247  }
248  i__2 = *n;
249  for (j = 2; j <= i__2; ++j) {
250  dplus = d__[j] - s - e2[j - 1] / dplus;
251  if (dplus < 0.) {
252  ++cnt;
253  }
254 /* L60: */
255  }
256  if (cnt < i__) {
257  right += werr[ii] * fac;
258  fac *= 2.;
259  goto L50;
260  }
261  ++nint;
262  iwork[k - 1] = i__ + 1;
263  iwork[k] = cnt;
264  }
265  work[k - 1] = left;
266  work[k] = right;
267 /* L75: */
268  }
269  savi1 = i1;
270 
271 /* Do while( NINT.GT.0 ), i.e. there are still unconverged intervals */
272 /* and while (ITER.LT.MAXITR) */
273 
274  iter = 0;
275 L80:
276  prev = i1 - 1;
277  i__ = i1;
278  olnint = nint;
279  i__1 = olnint;
280  for (p = 1; p <= i__1; ++p) {
281  k = i__ << 1;
282  ii = i__ - *offset;
283  next = iwork[k - 1];
284  left = work[k - 1];
285  right = work[k];
286  mid = (left + right) * .5;
287 /* semiwidth of interval */
288  width = right - mid;
289 /* Computing MAX */
290  d__1 = absMACRO(left), d__2 = absMACRO(right);
291  tmp = maxMACRO(d__1,d__2);
292  if (width < *rtol * tmp || iter == maxitr) {
293 /* reduce number of unconverged intervals */
294  --nint;
295 /* Mark interval as converged. */
296  iwork[k - 1] = 0;
297  if (i1 == i__) {
298  i1 = next;
299  } else {
300 /* Prev holds the last unconverged interval previously examined */
301  if (prev >= i1) {
302  iwork[(prev << 1) - 1] = next;
303  }
304  }
305  i__ = next;
306  goto L100;
307  }
308  prev = i__;
309 
310 /* Perform one bisection step */
311 
312  cnt = 0;
313  s = mid;
314  dplus = d__[1] - s;
315  if (dplus < 0.) {
316  ++cnt;
317  }
318  i__2 = *n;
319  for (j = 2; j <= i__2; ++j) {
320  dplus = d__[j] - s - e2[j - 1] / dplus;
321  if (dplus < 0.) {
322  ++cnt;
323  }
324 /* L90: */
325  }
326  if (cnt <= i__ - 1) {
327  work[k - 1] = mid;
328  } else {
329  work[k] = mid;
330  }
331  i__ = next;
332 L100:
333  ;
334  }
335  ++iter;
336 /* do another loop if there are still unconverged intervals */
337 /* However, in the last iteration, all intervals are accepted */
338 /* since this is the best we can do. */
339  if (nint > 0 && iter <= maxitr) {
340  goto L80;
341  }
342 
343 
344 /* At this point, all the intervals have converged */
345  i__1 = *ilast;
346  for (i__ = savi1; i__ <= i__1; ++i__) {
347  k = i__ << 1;
348  ii = i__ - *offset;
349 /* All intervals marked by '0' have been refined. */
350  if (iwork[k - 1] == 0) {
351  w[ii] = (work[k - 1] + work[k]) * .5;
352  werr[ii] = work[k] - w[ii];
353  }
354 /* L110: */
355  }
356 
357  return 0;
358 
359 /* End of DLARRJ */
360 
361 } /* dlarrj_ */
362 
363 #endif
#define absMACRO(x)
Definition: template_blas_common.h:47
Definition: Matrix.h:75
int integer
Definition: template_blas_common.h:40
#define maxMACRO(a, b)
Definition: template_blas_common.h:45
Treal template_blas_log(Treal x)
int template_lapack_larrj(integer *n, Treal *d__, Treal *e2, integer *ifirst, integer *ilast, Treal *rtol, integer *offset, Treal *w, Treal *werr, Treal *work, integer *iwork, Treal *pivmin, Treal *spdiam, integer *info)
Definition: template_lapack_larrj.h:41
Definition: Matrix.h:75