ergo
template_lapack_ormqr.h
Go to the documentation of this file.
1 /* Ergo, version 3.7, a program for linear scaling electronic structure
2  * calculations.
3  * Copyright (C) 2018 Elias Rudberg, Emanuel H. Rubensson, Pawel Salek,
4  * and Anastasia Kruchinina.
5  *
6  * This program is free software: you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation, either version 3 of the License, or
9  * (at your option) any later version.
10  *
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program. If not, see <http://www.gnu.org/licenses/>.
18  *
19  * Primary academic reference:
20  * Ergo: An open-source program for linear-scaling electronic structure
21  * calculations,
22  * Elias Rudberg, Emanuel H. Rubensson, Pawel Salek, and Anastasia
23  * Kruchinina,
24  * SoftwareX 7, 107 (2018),
25  * <http://dx.doi.org/10.1016/j.softx.2018.03.005>
26  *
27  * For further information about Ergo, see <http://www.ergoscf.org>.
28  */
29 
30  /* This file belongs to the template_lapack part of the Ergo source
31  * code. The source files in the template_lapack directory are modified
32  * versions of files originally distributed as CLAPACK, see the
33  * Copyright/license notice in the file template_lapack/COPYING.
34  */
35 
36 
37 #ifndef TEMPLATE_LAPACK_ORMQR_HEADER
38 #define TEMPLATE_LAPACK_ORMQR_HEADER
39 
40 
41 template<class Treal>
42 int template_lapack_ormqr(char *side, char *trans, const integer *m, const integer *n,
43  const integer *k, Treal *a, const integer *lda, const Treal *tau, Treal *
44  c__, const integer *ldc, Treal *work, const integer *lwork, integer *info)
45 {
46 /* -- LAPACK routine (version 3.0) --
47  Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
48  Courant Institute, Argonne National Lab, and Rice University
49  June 30, 1999
50 
51 
52  Purpose
53  =======
54 
55  DORMQR overwrites the general real M-by-N matrix C with
56 
57  SIDE = 'L' SIDE = 'R'
58  TRANS = 'N': Q * C C * Q
59  TRANS = 'T': Q**T * C C * Q**T
60 
61  where Q is a real orthogonal matrix defined as the product of k
62  elementary reflectors
63 
64  Q = H(1) H(2) . . . H(k)
65 
66  as returned by DGEQRF. Q is of order M if SIDE = 'L' and of order N
67  if SIDE = 'R'.
68 
69  Arguments
70  =========
71 
72  SIDE (input) CHARACTER*1
73  = 'L': apply Q or Q**T from the Left;
74  = 'R': apply Q or Q**T from the Right.
75 
76  TRANS (input) CHARACTER*1
77  = 'N': No transpose, apply Q;
78  = 'T': Transpose, apply Q**T.
79 
80  M (input) INTEGER
81  The number of rows of the matrix C. M >= 0.
82 
83  N (input) INTEGER
84  The number of columns of the matrix C. N >= 0.
85 
86  K (input) INTEGER
87  The number of elementary reflectors whose product defines
88  the matrix Q.
89  If SIDE = 'L', M >= K >= 0;
90  if SIDE = 'R', N >= K >= 0.
91 
92  A (input) DOUBLE PRECISION array, dimension (LDA,K)
93  The i-th column must contain the vector which defines the
94  elementary reflector H(i), for i = 1,2,...,k, as returned by
95  DGEQRF in the first k columns of its array argument A.
96  A is modified by the routine but restored on exit.
97 
98  LDA (input) INTEGER
99  The leading dimension of the array A.
100  If SIDE = 'L', LDA >= max(1,M);
101  if SIDE = 'R', LDA >= max(1,N).
102 
103  TAU (input) DOUBLE PRECISION array, dimension (K)
104  TAU(i) must contain the scalar factor of the elementary
105  reflector H(i), as returned by DGEQRF.
106 
107  C (input/output) DOUBLE PRECISION array, dimension (LDC,N)
108  On entry, the M-by-N matrix C.
109  On exit, C is overwritten by Q*C or Q**T*C or C*Q**T or C*Q.
110 
111  LDC (input) INTEGER
112  The leading dimension of the array C. LDC >= max(1,M).
113 
114  WORK (workspace/output) DOUBLE PRECISION array, dimension (LWORK)
115  On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
116 
117  LWORK (input) INTEGER
118  The dimension of the array WORK.
119  If SIDE = 'L', LWORK >= max(1,N);
120  if SIDE = 'R', LWORK >= max(1,M).
121  For optimum performance LWORK >= N*NB if SIDE = 'L', and
122  LWORK >= M*NB if SIDE = 'R', where NB is the optimal
123  blocksize.
124 
125  If LWORK = -1, then a workspace query is assumed; the routine
126  only calculates the optimal size of the WORK array, returns
127  this value as the first entry of the WORK array, and no error
128  message related to LWORK is issued by XERBLA.
129 
130  INFO (output) INTEGER
131  = 0: successful exit
132  < 0: if INFO = -i, the i-th argument had an illegal value
133 
134  =====================================================================
135 
136 
137  Test the input arguments
138 
139  Parameter adjustments */
140  /* Table of constant values */
141  integer c__1 = 1;
142  integer c_n1 = -1;
143  integer c__2 = 2;
144  integer c__65 = 65;
145 
146  /* System generated locals */
147  address a__1[2];
148  integer a_dim1, a_offset, c_dim1, c_offset, i__1, i__2, i__3[2], i__4,
149  i__5;
150  char ch__1[2];
151  /* Local variables */
152  logical left;
153  integer i__;
154  Treal t[4160] /* was [65][64] */;
155  integer nbmin, iinfo, i1, i2, i3;
156  integer ib, ic, jc, nb, mi, ni;
157  integer nq, nw;
158  logical notran;
159  integer ldwork, lwkopt;
160  logical lquery;
161  integer iws;
162 #define a_ref(a_1,a_2) a[(a_2)*a_dim1 + a_1]
163 #define c___ref(a_1,a_2) c__[(a_2)*c_dim1 + a_1]
164 
165 
166  a_dim1 = *lda;
167  a_offset = 1 + a_dim1 * 1;
168  a -= a_offset;
169  --tau;
170  c_dim1 = *ldc;
171  c_offset = 1 + c_dim1 * 1;
172  c__ -= c_offset;
173  --work;
174 
175  /* Initialization added by Elias to get rid of compiler warnings. */
176  lwkopt = 0;
177  nb = 0;
178  /* Function Body */
179  *info = 0;
180  left = template_blas_lsame(side, "L");
181  notran = template_blas_lsame(trans, "N");
182  lquery = *lwork == -1;
183 
184 /* NQ is the order of Q and NW is the minimum dimension of WORK */
185 
186  if (left) {
187  nq = *m;
188  nw = *n;
189  } else {
190  nq = *n;
191  nw = *m;
192  }
193  if (! left && ! template_blas_lsame(side, "R")) {
194  *info = -1;
195  } else if (! notran && ! template_blas_lsame(trans, "T")) {
196  *info = -2;
197  } else if (*m < 0) {
198  *info = -3;
199  } else if (*n < 0) {
200  *info = -4;
201  } else if (*k < 0 || *k > nq) {
202  *info = -5;
203  } else if (*lda < maxMACRO(1,nq)) {
204  *info = -7;
205  } else if (*ldc < maxMACRO(1,*m)) {
206  *info = -10;
207  } else if (*lwork < maxMACRO(1,nw) && ! lquery) {
208  *info = -12;
209  }
210 
211  if (*info == 0) {
212 
213 /* Determine the block size. NB may be at most NBMAX, where NBMAX
214  is used to define the local array T.
215 
216  Computing MIN
217  Writing concatenation */
218  i__3[0] = 1, a__1[0] = side;
219  i__3[1] = 1, a__1[1] = trans;
220  template_blas_s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2);
221  i__1 = 64, i__2 = template_lapack_ilaenv(&c__1, "DORMQR", ch__1, m, n, k, &c_n1, (
222  ftnlen)6, (ftnlen)2);
223  nb = minMACRO(i__1,i__2);
224  lwkopt = maxMACRO(1,nw) * nb;
225  work[1] = (Treal) lwkopt;
226  }
227 
228  if (*info != 0) {
229  i__1 = -(*info);
230  template_blas_erbla("ORMQR ", &i__1);
231  return 0;
232  } else if (lquery) {
233  return 0;
234  }
235 
236 /* Quick return if possible */
237 
238  if (*m == 0 || *n == 0 || *k == 0) {
239  work[1] = 1.;
240  return 0;
241  }
242 
243  nbmin = 2;
244  ldwork = nw;
245  if (nb > 1 && nb < *k) {
246  iws = nw * nb;
247  if (*lwork < iws) {
248  nb = *lwork / ldwork;
249 /* Computing MAX
250  Writing concatenation */
251  i__3[0] = 1, a__1[0] = side;
252  i__3[1] = 1, a__1[1] = trans;
253  template_blas_s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2);
254  i__1 = 2, i__2 = template_lapack_ilaenv(&c__2, "DORMQR", ch__1, m, n, k, &c_n1, (
255  ftnlen)6, (ftnlen)2);
256  nbmin = maxMACRO(i__1,i__2);
257  }
258  } else {
259  iws = nw;
260  }
261 
262  if (nb < nbmin || nb >= *k) {
263 
264 /* Use unblocked code */
265 
266  template_lapack_orm2r(side, trans, m, n, k, &a[a_offset], lda, &tau[1], &c__[
267  c_offset], ldc, &work[1], &iinfo);
268  } else {
269 
270 /* Use blocked code */
271 
272  if ( ( left && ! notran ) || ( ! left && notran ) ) {
273  i1 = 1;
274  i2 = *k;
275  i3 = nb;
276  } else {
277  i1 = (*k - 1) / nb * nb + 1;
278  i2 = 1;
279  i3 = -nb;
280  }
281 
282  if (left) {
283  ni = *n;
284  jc = 1;
285  } else {
286  mi = *m;
287  ic = 1;
288  }
289 
290  i__1 = i2;
291  i__2 = i3;
292  for (i__ = i1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
293 /* Computing MIN */
294  i__4 = nb, i__5 = *k - i__ + 1;
295  ib = minMACRO(i__4,i__5);
296 
297 /* Form the triangular factor of the block reflector
298  H = H(i) H(i+1) . . . H(i+ib-1) */
299 
300  i__4 = nq - i__ + 1;
301  template_lapack_larft("Forward", "Columnwise", &i__4, &ib, &a_ref(i__, i__),
302  lda, &tau[i__], t, &c__65);
303  if (left) {
304 
305 /* H or H' is applied to C(i:m,1:n) */
306 
307  mi = *m - i__ + 1;
308  ic = i__;
309  } else {
310 
311 /* H or H' is applied to C(1:m,i:n) */
312 
313  ni = *n - i__ + 1;
314  jc = i__;
315  }
316 
317 /* Apply H or H' */
318 
319  template_lapack_larfb(side, trans, "Forward", "Columnwise", &mi, &ni, &ib, &
320  a_ref(i__, i__), lda, t, &c__65, &c___ref(ic, jc), ldc, &
321  work[1], &ldwork);
322 /* L10: */
323  }
324  }
325  work[1] = (Treal) lwkopt;
326  return 0;
327 
328 /* End of DORMQR */
329 
330 } /* dormqr_ */
331 
332 #undef c___ref
333 #undef a_ref
334 
335 
336 #endif
#define a_ref(a_1, a_2)
int integer
Definition: template_blas_common.h:40
integer template_lapack_ilaenv(const integer *ispec, const char *name__, const char *opts, const integer *n1, const integer *n2, const integer *n3, const integer *n4, ftnlen name_len, ftnlen opts_len)
Definition: template_lapack_common.cc:281
char * address
Definition: template_blas_common.h:43
#define maxMACRO(a, b)
Definition: template_blas_common.h:45
#define minMACRO(a, b)
Definition: template_blas_common.h:46
int template_blas_erbla(const char *srname, integer *info)
Definition: template_blas_common.cc:146
int template_lapack_larfb(const char *side, const char *trans, const char *direct, const char *storev, const integer *m, const integer *n, const integer *k, const Treal *v, const integer *ldv, const Treal *t, const integer *ldt, Treal *c__, const integer *ldc, Treal *work, const integer *ldwork)
Definition: template_lapack_larfb.h:42
#define c___ref(a_1, a_2)
void template_blas_s_cat(char *lp, char *rpp[], ftnlen rnp[], ftnlen *np, ftnlen ll)
Definition: template_blas_common.cc:204
int template_lapack_ormqr(char *side, char *trans, const integer *m, const integer *n, const integer *k, Treal *a, const integer *lda, const Treal *tau, Treal *c__, const integer *ldc, Treal *work, const integer *lwork, integer *info)
Definition: template_lapack_ormqr.h:42
int template_lapack_orm2r(const char *side, const char *trans, const integer *m, const integer *n, const integer *k, Treal *a, const integer *lda, const Treal *tau, Treal *c__, const integer *ldc, Treal *work, integer *info)
Definition: template_lapack_orm2r.h:42
bool logical
Definition: template_blas_common.h:41
side
Definition: Matrix.h:75
int ftnlen
Definition: template_blas_common.h:42
Definition: Matrix.h:75
logical template_blas_lsame(const char *ca, const char *cb)
Definition: template_blas_common.cc:46
int template_lapack_larft(const char *direct, const char *storev, const integer *n, const integer *k, Treal *v, const integer *ldv, const Treal *tau, Treal *t, const integer *ldt)
Definition: template_lapack_larft.h:42