ergo
template_blas_trsm.h
Go to the documentation of this file.
1 /* Ergo, version 3.7, a program for linear scaling electronic structure
2  * calculations.
3  * Copyright (C) 2018 Elias Rudberg, Emanuel H. Rubensson, Pawel Salek,
4  * and Anastasia Kruchinina.
5  *
6  * This program is free software: you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation, either version 3 of the License, or
9  * (at your option) any later version.
10  *
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program. If not, see <http://www.gnu.org/licenses/>.
18  *
19  * Primary academic reference:
20  * Ergo: An open-source program for linear-scaling electronic structure
21  * calculations,
22  * Elias Rudberg, Emanuel H. Rubensson, Pawel Salek, and Anastasia
23  * Kruchinina,
24  * SoftwareX 7, 107 (2018),
25  * <http://dx.doi.org/10.1016/j.softx.2018.03.005>
26  *
27  * For further information about Ergo, see <http://www.ergoscf.org>.
28  */
29 
30  /* This file belongs to the template_lapack part of the Ergo source
31  * code. The source files in the template_lapack directory are modified
32  * versions of files originally distributed as CLAPACK, see the
33  * Copyright/license notice in the file template_lapack/COPYING.
34  */
35 
36 
37 #ifndef TEMPLATE_BLAS_TRSM_HEADER
38 #define TEMPLATE_BLAS_TRSM_HEADER
39 
40 #include "template_blas_common.h"
41 
42 template<class Treal>
43 int template_blas_trsm(const char *side, const char *uplo, const char *transa, const char *diag,
44  const integer *m, const integer *n, const Treal *alpha, const Treal *a, const integer *
45  lda, Treal *b, const integer *ldb)
46 {
47  /* System generated locals */
48  integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2, i__3;
49  /* Local variables */
50  integer info;
51  Treal temp;
52  integer i__, j, k;
53  logical lside;
54  integer nrowa;
55  logical upper;
56  logical nounit;
57 #define a_ref(a_1,a_2) a[(a_2)*a_dim1 + a_1]
58 #define b_ref(a_1,a_2) b[(a_2)*b_dim1 + a_1]
59 /* Purpose
60  =======
61  DTRSM solves one of the matrix equations
62  op( A )*X = alpha*B, or X*op( A ) = alpha*B,
63  where alpha is a scalar, X and B are m by n matrices, A is a unit, or
64  non-unit, upper or lower triangular matrix and op( A ) is one of
65  op( A ) = A or op( A ) = A'.
66  The matrix X is overwritten on B.
67  Parameters
68  ==========
69  SIDE - CHARACTER*1.
70  On entry, SIDE specifies whether op( A ) appears on the left
71  or right of X as follows:
72  SIDE = 'L' or 'l' op( A )*X = alpha*B.
73  SIDE = 'R' or 'r' X*op( A ) = alpha*B.
74  Unchanged on exit.
75  UPLO - CHARACTER*1.
76  On entry, UPLO specifies whether the matrix A is an upper or
77  lower triangular matrix as follows:
78  UPLO = 'U' or 'u' A is an upper triangular matrix.
79  UPLO = 'L' or 'l' A is a lower triangular matrix.
80  Unchanged on exit.
81  TRANSA - CHARACTER*1.
82  On entry, TRANSA specifies the form of op( A ) to be used in
83  the matrix multiplication as follows:
84  TRANSA = 'N' or 'n' op( A ) = A.
85  TRANSA = 'T' or 't' op( A ) = A'.
86  TRANSA = 'C' or 'c' op( A ) = A'.
87  Unchanged on exit.
88  DIAG - CHARACTER*1.
89  On entry, DIAG specifies whether or not A is unit triangular
90  as follows:
91  DIAG = 'U' or 'u' A is assumed to be unit triangular.
92  DIAG = 'N' or 'n' A is not assumed to be unit
93  triangular.
94  Unchanged on exit.
95  M - INTEGER.
96  On entry, M specifies the number of rows of B. M must be at
97  least zero.
98  Unchanged on exit.
99  N - INTEGER.
100  On entry, N specifies the number of columns of B. N must be
101  at least zero.
102  Unchanged on exit.
103  ALPHA - DOUBLE PRECISION.
104  On entry, ALPHA specifies the scalar alpha. When alpha is
105  zero then A is not referenced and B need not be set before
106  entry.
107  Unchanged on exit.
108  A - DOUBLE PRECISION array of DIMENSION ( LDA, k ), where k is m
109  when SIDE = 'L' or 'l' and is n when SIDE = 'R' or 'r'.
110  Before entry with UPLO = 'U' or 'u', the leading k by k
111  upper triangular part of the array A must contain the upper
112  triangular matrix and the strictly lower triangular part of
113  A is not referenced.
114  Before entry with UPLO = 'L' or 'l', the leading k by k
115  lower triangular part of the array A must contain the lower
116  triangular matrix and the strictly upper triangular part of
117  A is not referenced.
118  Note that when DIAG = 'U' or 'u', the diagonal elements of
119  A are not referenced either, but are assumed to be unity.
120  Unchanged on exit.
121  LDA - INTEGER.
122  On entry, LDA specifies the first dimension of A as declared
123  in the calling (sub) program. When SIDE = 'L' or 'l' then
124  LDA must be at least max( 1, m ), when SIDE = 'R' or 'r'
125  then LDA must be at least max( 1, n ).
126  Unchanged on exit.
127  B - DOUBLE PRECISION array of DIMENSION ( LDB, n ).
128  Before entry, the leading m by n part of the array B must
129  contain the right-hand side matrix B, and on exit is
130  overwritten by the solution matrix X.
131  LDB - INTEGER.
132  On entry, LDB specifies the first dimension of B as declared
133  in the calling (sub) program. LDB must be at least
134  max( 1, m ).
135  Unchanged on exit.
136  Level 3 Blas routine.
137  -- Written on 8-February-1989.
138  Jack Dongarra, Argonne National Laboratory.
139  Iain Duff, AERE Harwell.
140  Jeremy Du Croz, Numerical Algorithms Group Ltd.
141  Sven Hammarling, Numerical Algorithms Group Ltd.
142  Test the input parameters.
143  Parameter adjustments */
144  a_dim1 = *lda;
145  a_offset = 1 + a_dim1 * 1;
146  a -= a_offset;
147  b_dim1 = *ldb;
148  b_offset = 1 + b_dim1 * 1;
149  b -= b_offset;
150  /* Function Body */
151  lside = template_blas_lsame(side, "L");
152  if (lside) {
153  nrowa = *m;
154  } else {
155  nrowa = *n;
156  }
157  nounit = template_blas_lsame(diag, "N");
158  upper = template_blas_lsame(uplo, "U");
159  info = 0;
160  if (! lside && ! template_blas_lsame(side, "R")) {
161  info = 1;
162  } else if (! upper && ! template_blas_lsame(uplo, "L")) {
163  info = 2;
164  } else if (! template_blas_lsame(transa, "N") && ! template_blas_lsame(transa,
165  "T") && ! template_blas_lsame(transa, "C")) {
166  info = 3;
167  } else if (! template_blas_lsame(diag, "U") && ! template_blas_lsame(diag,
168  "N")) {
169  info = 4;
170  } else if (*m < 0) {
171  info = 5;
172  } else if (*n < 0) {
173  info = 6;
174  } else if (*lda < maxMACRO(1,nrowa)) {
175  info = 9;
176  } else if (*ldb < maxMACRO(1,*m)) {
177  info = 11;
178  }
179  if (info != 0) {
180  template_blas_erbla("TRSM ", &info);
181  return 0;
182  }
183 /* Quick return if possible. */
184  if (*n == 0) {
185  return 0;
186  }
187 /* And when alpha.eq.zero. */
188  if (*alpha == 0.) {
189  i__1 = *n;
190  for (j = 1; j <= i__1; ++j) {
191  i__2 = *m;
192  for (i__ = 1; i__ <= i__2; ++i__) {
193  b_ref(i__, j) = 0.;
194 /* L10: */
195  }
196 /* L20: */
197  }
198  return 0;
199  }
200 /* Start the operations. */
201  if (lside) {
202  if (template_blas_lsame(transa, "N")) {
203 /* Form B := alpha*inv( A )*B. */
204  if (upper) {
205  i__1 = *n;
206  for (j = 1; j <= i__1; ++j) {
207  if (*alpha != 1.) {
208  i__2 = *m;
209  for (i__ = 1; i__ <= i__2; ++i__) {
210  b_ref(i__, j) = *alpha * b_ref(i__, j);
211 /* L30: */
212  }
213  }
214  for (k = *m; k >= 1; --k) {
215  if (b_ref(k, j) != 0.) {
216  if (nounit) {
217  b_ref(k, j) = b_ref(k, j) / a_ref(k, k);
218  }
219  i__2 = k - 1;
220  for (i__ = 1; i__ <= i__2; ++i__) {
221  b_ref(i__, j) = b_ref(i__, j) - b_ref(k, j) *
222  a_ref(i__, k);
223 /* L40: */
224  }
225  }
226 /* L50: */
227  }
228 /* L60: */
229  }
230  } else {
231  i__1 = *n;
232  for (j = 1; j <= i__1; ++j) {
233  if (*alpha != 1.) {
234  i__2 = *m;
235  for (i__ = 1; i__ <= i__2; ++i__) {
236  b_ref(i__, j) = *alpha * b_ref(i__, j);
237 /* L70: */
238  }
239  }
240  i__2 = *m;
241  for (k = 1; k <= i__2; ++k) {
242  if (b_ref(k, j) != 0.) {
243  if (nounit) {
244  b_ref(k, j) = b_ref(k, j) / a_ref(k, k);
245  }
246  i__3 = *m;
247  for (i__ = k + 1; i__ <= i__3; ++i__) {
248  b_ref(i__, j) = b_ref(i__, j) - b_ref(k, j) *
249  a_ref(i__, k);
250 /* L80: */
251  }
252  }
253 /* L90: */
254  }
255 /* L100: */
256  }
257  }
258  } else {
259 /* Form B := alpha*inv( A' )*B. */
260  if (upper) {
261  i__1 = *n;
262  for (j = 1; j <= i__1; ++j) {
263  i__2 = *m;
264  for (i__ = 1; i__ <= i__2; ++i__) {
265  temp = *alpha * b_ref(i__, j);
266  i__3 = i__ - 1;
267  for (k = 1; k <= i__3; ++k) {
268  temp -= a_ref(k, i__) * b_ref(k, j);
269 /* L110: */
270  }
271  if (nounit) {
272  temp /= a_ref(i__, i__);
273  }
274  b_ref(i__, j) = temp;
275 /* L120: */
276  }
277 /* L130: */
278  }
279  } else {
280  i__1 = *n;
281  for (j = 1; j <= i__1; ++j) {
282  for (i__ = *m; i__ >= 1; --i__) {
283  temp = *alpha * b_ref(i__, j);
284  i__2 = *m;
285  for (k = i__ + 1; k <= i__2; ++k) {
286  temp -= a_ref(k, i__) * b_ref(k, j);
287 /* L140: */
288  }
289  if (nounit) {
290  temp /= a_ref(i__, i__);
291  }
292  b_ref(i__, j) = temp;
293 /* L150: */
294  }
295 /* L160: */
296  }
297  }
298  }
299  } else {
300  if (template_blas_lsame(transa, "N")) {
301 /* Form B := alpha*B*inv( A ). */
302  if (upper) {
303  i__1 = *n;
304  for (j = 1; j <= i__1; ++j) {
305  if (*alpha != 1.) {
306  i__2 = *m;
307  for (i__ = 1; i__ <= i__2; ++i__) {
308  b_ref(i__, j) = *alpha * b_ref(i__, j);
309 /* L170: */
310  }
311  }
312  i__2 = j - 1;
313  for (k = 1; k <= i__2; ++k) {
314  if (a_ref(k, j) != 0.) {
315  i__3 = *m;
316  for (i__ = 1; i__ <= i__3; ++i__) {
317  b_ref(i__, j) = b_ref(i__, j) - a_ref(k, j) *
318  b_ref(i__, k);
319 /* L180: */
320  }
321  }
322 /* L190: */
323  }
324  if (nounit) {
325  temp = 1. / a_ref(j, j);
326  i__2 = *m;
327  for (i__ = 1; i__ <= i__2; ++i__) {
328  b_ref(i__, j) = temp * b_ref(i__, j);
329 /* L200: */
330  }
331  }
332 /* L210: */
333  }
334  } else {
335  for (j = *n; j >= 1; --j) {
336  if (*alpha != 1.) {
337  i__1 = *m;
338  for (i__ = 1; i__ <= i__1; ++i__) {
339  b_ref(i__, j) = *alpha * b_ref(i__, j);
340 /* L220: */
341  }
342  }
343  i__1 = *n;
344  for (k = j + 1; k <= i__1; ++k) {
345  if (a_ref(k, j) != 0.) {
346  i__2 = *m;
347  for (i__ = 1; i__ <= i__2; ++i__) {
348  b_ref(i__, j) = b_ref(i__, j) - a_ref(k, j) *
349  b_ref(i__, k);
350 /* L230: */
351  }
352  }
353 /* L240: */
354  }
355  if (nounit) {
356  temp = 1. / a_ref(j, j);
357  i__1 = *m;
358  for (i__ = 1; i__ <= i__1; ++i__) {
359  b_ref(i__, j) = temp * b_ref(i__, j);
360 /* L250: */
361  }
362  }
363 /* L260: */
364  }
365  }
366  } else {
367 /* Form B := alpha*B*inv( A' ). */
368  if (upper) {
369  for (k = *n; k >= 1; --k) {
370  if (nounit) {
371  temp = 1. / a_ref(k, k);
372  i__1 = *m;
373  for (i__ = 1; i__ <= i__1; ++i__) {
374  b_ref(i__, k) = temp * b_ref(i__, k);
375 /* L270: */
376  }
377  }
378  i__1 = k - 1;
379  for (j = 1; j <= i__1; ++j) {
380  if (a_ref(j, k) != 0.) {
381  temp = a_ref(j, k);
382  i__2 = *m;
383  for (i__ = 1; i__ <= i__2; ++i__) {
384  b_ref(i__, j) = b_ref(i__, j) - temp * b_ref(
385  i__, k);
386 /* L280: */
387  }
388  }
389 /* L290: */
390  }
391  if (*alpha != 1.) {
392  i__1 = *m;
393  for (i__ = 1; i__ <= i__1; ++i__) {
394  b_ref(i__, k) = *alpha * b_ref(i__, k);
395 /* L300: */
396  }
397  }
398 /* L310: */
399  }
400  } else {
401  i__1 = *n;
402  for (k = 1; k <= i__1; ++k) {
403  if (nounit) {
404  temp = 1. / a_ref(k, k);
405  i__2 = *m;
406  for (i__ = 1; i__ <= i__2; ++i__) {
407  b_ref(i__, k) = temp * b_ref(i__, k);
408 /* L320: */
409  }
410  }
411  i__2 = *n;
412  for (j = k + 1; j <= i__2; ++j) {
413  if (a_ref(j, k) != 0.) {
414  temp = a_ref(j, k);
415  i__3 = *m;
416  for (i__ = 1; i__ <= i__3; ++i__) {
417  b_ref(i__, j) = b_ref(i__, j) - temp * b_ref(
418  i__, k);
419 /* L330: */
420  }
421  }
422 /* L340: */
423  }
424  if (*alpha != 1.) {
425  i__2 = *m;
426  for (i__ = 1; i__ <= i__2; ++i__) {
427  b_ref(i__, k) = *alpha * b_ref(i__, k);
428 /* L350: */
429  }
430  }
431 /* L360: */
432  }
433  }
434  }
435  }
436  return 0;
437 /* End of DTRSM . */
438 } /* dtrsm_ */
439 #undef b_ref
440 #undef a_ref
441 
442 #endif
#define a_ref(a_1, a_2)
int integer
Definition: template_blas_common.h:40
#define maxMACRO(a, b)
Definition: template_blas_common.h:45
#define b_ref(a_1, a_2)
int template_blas_erbla(const char *srname, integer *info)
Definition: template_blas_common.cc:146
int template_blas_trsm(const char *side, const char *uplo, const char *transa, const char *diag, const integer *m, const integer *n, const Treal *alpha, const Treal *a, const integer *lda, Treal *b, const integer *ldb)
Definition: template_blas_trsm.h:43
bool logical
Definition: template_blas_common.h:41
side
Definition: Matrix.h:75
logical template_blas_lsame(const char *ca, const char *cb)
Definition: template_blas_common.cc:46