ergo
template_blas_gemm.h
Go to the documentation of this file.
1 /* Ergo, version 3.7, a program for linear scaling electronic structure
2  * calculations.
3  * Copyright (C) 2018 Elias Rudberg, Emanuel H. Rubensson, Pawel Salek,
4  * and Anastasia Kruchinina.
5  *
6  * This program is free software: you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation, either version 3 of the License, or
9  * (at your option) any later version.
10  *
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program. If not, see <http://www.gnu.org/licenses/>.
18  *
19  * Primary academic reference:
20  * Ergo: An open-source program for linear-scaling electronic structure
21  * calculations,
22  * Elias Rudberg, Emanuel H. Rubensson, Pawel Salek, and Anastasia
23  * Kruchinina,
24  * SoftwareX 7, 107 (2018),
25  * <http://dx.doi.org/10.1016/j.softx.2018.03.005>
26  *
27  * For further information about Ergo, see <http://www.ergoscf.org>.
28  */
29 
30  /* This file belongs to the template_lapack part of the Ergo source
31  * code. The source files in the template_lapack directory are modified
32  * versions of files originally distributed as CLAPACK, see the
33  * Copyright/license notice in the file template_lapack/COPYING.
34  */
35 
36 
37 #ifndef TEMPLATE_BLAS_GEMM_HEADER
38 #define TEMPLATE_BLAS_GEMM_HEADER
39 
40 #include "template_blas_common.h"
41 
42 template<class Treal>
43 int template_blas_gemm(const char *transa, const char *transb, const integer *m, const integer *
44  n, const integer *k, const Treal *alpha, const Treal *a, const integer *lda,
45  const Treal *b, const integer *ldb, const Treal *beta, Treal *c__,
46  const integer *ldc)
47 {
48  /* System generated locals */
49  integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, i__1, i__2,
50  i__3;
51  /* Local variables */
52  integer info;
53  logical nota, notb;
54  Treal temp;
55  integer i__, j, l;
56  integer nrowa, nrowb;
57 #define a_ref(a_1,a_2) a[(a_2)*a_dim1 + a_1]
58 #define b_ref(a_1,a_2) b[(a_2)*b_dim1 + a_1]
59 #define c___ref(a_1,a_2) c__[(a_2)*c_dim1 + a_1]
60 /* Purpose
61  =======
62  DGEMM performs one of the matrix-matrix operations
63  C := alpha*op( A )*op( B ) + beta*C,
64  where op( X ) is one of
65  op( X ) = X or op( X ) = X',
66  alpha and beta are scalars, and A, B and C are matrices, with op( A )
67  an m by k matrix, op( B ) a k by n matrix and C an m by n matrix.
68  Parameters
69  ==========
70  TRANSA - CHARACTER*1.
71  On entry, TRANSA specifies the form of op( A ) to be used in
72  the matrix multiplication as follows:
73  TRANSA = 'N' or 'n', op( A ) = A.
74  TRANSA = 'T' or 't', op( A ) = A'.
75  TRANSA = 'C' or 'c', op( A ) = A'.
76  Unchanged on exit.
77  TRANSB - CHARACTER*1.
78  On entry, TRANSB specifies the form of op( B ) to be used in
79  the matrix multiplication as follows:
80  TRANSB = 'N' or 'n', op( B ) = B.
81  TRANSB = 'T' or 't', op( B ) = B'.
82  TRANSB = 'C' or 'c', op( B ) = B'.
83  Unchanged on exit.
84  M - INTEGER.
85  On entry, M specifies the number of rows of the matrix
86  op( A ) and of the matrix C. M must be at least zero.
87  Unchanged on exit.
88  N - INTEGER.
89  On entry, N specifies the number of columns of the matrix
90  op( B ) and the number of columns of the matrix C. N must be
91  at least zero.
92  Unchanged on exit.
93  K - INTEGER.
94  On entry, K specifies the number of columns of the matrix
95  op( A ) and the number of rows of the matrix op( B ). K must
96  be at least zero.
97  Unchanged on exit.
98  ALPHA - DOUBLE PRECISION.
99  On entry, ALPHA specifies the scalar alpha.
100  Unchanged on exit.
101  A - DOUBLE PRECISION array of DIMENSION ( LDA, ka ), where ka is
102  k when TRANSA = 'N' or 'n', and is m otherwise.
103  Before entry with TRANSA = 'N' or 'n', the leading m by k
104  part of the array A must contain the matrix A, otherwise
105  the leading k by m part of the array A must contain the
106  matrix A.
107  Unchanged on exit.
108  LDA - INTEGER.
109  On entry, LDA specifies the first dimension of A as declared
110  in the calling (sub) program. When TRANSA = 'N' or 'n' then
111  LDA must be at least max( 1, m ), otherwise LDA must be at
112  least max( 1, k ).
113  Unchanged on exit.
114  B - DOUBLE PRECISION array of DIMENSION ( LDB, kb ), where kb is
115  n when TRANSB = 'N' or 'n', and is k otherwise.
116  Before entry with TRANSB = 'N' or 'n', the leading k by n
117  part of the array B must contain the matrix B, otherwise
118  the leading n by k part of the array B must contain the
119  matrix B.
120  Unchanged on exit.
121  LDB - INTEGER.
122  On entry, LDB specifies the first dimension of B as declared
123  in the calling (sub) program. When TRANSB = 'N' or 'n' then
124  LDB must be at least max( 1, k ), otherwise LDB must be at
125  least max( 1, n ).
126  Unchanged on exit.
127  BETA - DOUBLE PRECISION.
128  On entry, BETA specifies the scalar beta. When BETA is
129  supplied as zero then C need not be set on input.
130  Unchanged on exit.
131  C - DOUBLE PRECISION array of DIMENSION ( LDC, n ).
132  Before entry, the leading m by n part of the array C must
133  contain the matrix C, except when beta is zero, in which
134  case C need not be set on entry.
135  On exit, the array C is overwritten by the m by n matrix
136  ( alpha*op( A )*op( B ) + beta*C ).
137  LDC - INTEGER.
138  On entry, LDC specifies the first dimension of C as declared
139  in the calling (sub) program. LDC must be at least
140  max( 1, m ).
141  Unchanged on exit.
142  Level 3 Blas routine.
143  -- Written on 8-February-1989.
144  Jack Dongarra, Argonne National Laboratory.
145  Iain Duff, AERE Harwell.
146  Jeremy Du Croz, Numerical Algorithms Group Ltd.
147  Sven Hammarling, Numerical Algorithms Group Ltd.
148  Set NOTA and NOTB as true if A and B respectively are not
149  transposed and set NROWA, NCOLA and NROWB as the number of rows
150  and columns of A and the number of rows of B respectively.
151  Parameter adjustments */
152  a_dim1 = *lda;
153  a_offset = 1 + a_dim1 * 1;
154  a -= a_offset;
155  b_dim1 = *ldb;
156  b_offset = 1 + b_dim1 * 1;
157  b -= b_offset;
158  c_dim1 = *ldc;
159  c_offset = 1 + c_dim1 * 1;
160  c__ -= c_offset;
161  /* Function Body */
162  nota = template_blas_lsame(transa, "N");
163  notb = template_blas_lsame(transb, "N");
164  if (nota) {
165  nrowa = *m;
166  } else {
167  nrowa = *k;
168  }
169  if (notb) {
170  nrowb = *k;
171  } else {
172  nrowb = *n;
173  }
174 /* Test the input parameters. */
175  info = 0;
176  if (! nota && ! template_blas_lsame(transa, "C") && ! template_blas_lsame(
177  transa, "T")) {
178  info = 1;
179  } else if (! notb && ! template_blas_lsame(transb, "C") && !
180  template_blas_lsame(transb, "T")) {
181  info = 2;
182  } else if (*m < 0) {
183  info = 3;
184  } else if (*n < 0) {
185  info = 4;
186  } else if (*k < 0) {
187  info = 5;
188  } else if (*lda < maxMACRO(1,nrowa)) {
189  info = 8;
190  } else if (*ldb < maxMACRO(1,nrowb)) {
191  info = 10;
192  } else if (*ldc < maxMACRO(1,*m)) {
193  info = 13;
194  }
195  if (info != 0) {
196  template_blas_erbla("DGEMM ", &info);
197  return 0;
198  }
199 /* Quick return if possible. */
200  if (*m == 0 || *n == 0 || ( (*alpha == 0. || *k == 0) && *beta == 1.) ) {
201  return 0;
202  }
203 /* And if alpha.eq.zero. */
204  if (*alpha == 0.) {
205  if (*beta == 0.) {
206  i__1 = *n;
207  for (j = 1; j <= i__1; ++j) {
208  i__2 = *m;
209  for (i__ = 1; i__ <= i__2; ++i__) {
210  c___ref(i__, j) = 0.;
211 /* L10: */
212  }
213 /* L20: */
214  }
215  } else {
216  i__1 = *n;
217  for (j = 1; j <= i__1; ++j) {
218  i__2 = *m;
219  for (i__ = 1; i__ <= i__2; ++i__) {
220  c___ref(i__, j) = *beta * c___ref(i__, j);
221 /* L30: */
222  }
223 /* L40: */
224  }
225  }
226  return 0;
227  }
228 /* Start the operations. */
229  if (notb) {
230  if (nota) {
231 /* Form C := alpha*A*B + beta*C. */
232  i__1 = *n;
233  for (j = 1; j <= i__1; ++j) {
234  if (*beta == 0.) {
235  i__2 = *m;
236  for (i__ = 1; i__ <= i__2; ++i__) {
237  c___ref(i__, j) = 0.;
238 /* L50: */
239  }
240  } else if (*beta != 1.) {
241  i__2 = *m;
242  for (i__ = 1; i__ <= i__2; ++i__) {
243  c___ref(i__, j) = *beta * c___ref(i__, j);
244 /* L60: */
245  }
246  }
247  i__2 = *k;
248  for (l = 1; l <= i__2; ++l) {
249  if (b_ref(l, j) != 0.) {
250  temp = *alpha * b_ref(l, j);
251  i__3 = *m;
252  for (i__ = 1; i__ <= i__3; ++i__) {
253  c___ref(i__, j) = c___ref(i__, j) + temp * a_ref(
254  i__, l);
255 /* L70: */
256  }
257  }
258 /* L80: */
259  }
260 /* L90: */
261  }
262  } else {
263 /* Form C := alpha*A'*B + beta*C */
264  i__1 = *n;
265  for (j = 1; j <= i__1; ++j) {
266  i__2 = *m;
267  for (i__ = 1; i__ <= i__2; ++i__) {
268  temp = 0.;
269  i__3 = *k;
270  for (l = 1; l <= i__3; ++l) {
271  temp += a_ref(l, i__) * b_ref(l, j);
272 /* L100: */
273  }
274  if (*beta == 0.) {
275  c___ref(i__, j) = *alpha * temp;
276  } else {
277  c___ref(i__, j) = *alpha * temp + *beta * c___ref(i__,
278  j);
279  }
280 /* L110: */
281  }
282 /* L120: */
283  }
284  }
285  } else {
286  if (nota) {
287 /* Form C := alpha*A*B' + beta*C */
288  i__1 = *n;
289  for (j = 1; j <= i__1; ++j) {
290  if (*beta == 0.) {
291  i__2 = *m;
292  for (i__ = 1; i__ <= i__2; ++i__) {
293  c___ref(i__, j) = 0.;
294 /* L130: */
295  }
296  } else if (*beta != 1.) {
297  i__2 = *m;
298  for (i__ = 1; i__ <= i__2; ++i__) {
299  c___ref(i__, j) = *beta * c___ref(i__, j);
300 /* L140: */
301  }
302  }
303  i__2 = *k;
304  for (l = 1; l <= i__2; ++l) {
305  if (b_ref(j, l) != 0.) {
306  temp = *alpha * b_ref(j, l);
307  i__3 = *m;
308  for (i__ = 1; i__ <= i__3; ++i__) {
309  c___ref(i__, j) = c___ref(i__, j) + temp * a_ref(
310  i__, l);
311 /* L150: */
312  }
313  }
314 /* L160: */
315  }
316 /* L170: */
317  }
318  } else {
319 /* Form C := alpha*A'*B' + beta*C */
320  i__1 = *n;
321  for (j = 1; j <= i__1; ++j) {
322  i__2 = *m;
323  for (i__ = 1; i__ <= i__2; ++i__) {
324  temp = 0.;
325  i__3 = *k;
326  for (l = 1; l <= i__3; ++l) {
327  temp += a_ref(l, i__) * b_ref(j, l);
328 /* L180: */
329  }
330  if (*beta == 0.) {
331  c___ref(i__, j) = *alpha * temp;
332  } else {
333  c___ref(i__, j) = *alpha * temp + *beta * c___ref(i__,
334  j);
335  }
336 /* L190: */
337  }
338 /* L200: */
339  }
340  }
341  }
342  return 0;
343 /* End of DGEMM . */
344 } /* dgemm_ */
345 #undef c___ref
346 #undef b_ref
347 #undef a_ref
348 
349 #endif
#define b_ref(a_1, a_2)
int integer
Definition: template_blas_common.h:40
#define maxMACRO(a, b)
Definition: template_blas_common.h:45
int template_blas_erbla(const char *srname, integer *info)
Definition: template_blas_common.cc:146
#define c___ref(a_1, a_2)
#define a_ref(a_1, a_2)
bool logical
Definition: template_blas_common.h:41
int template_blas_gemm(const char *transa, const char *transb, const integer *m, const integer *n, const integer *k, const Treal *alpha, const Treal *a, const integer *lda, const Treal *b, const integer *ldb, const Treal *beta, Treal *c__, const integer *ldc)
Definition: template_blas_gemm.h:43
logical template_blas_lsame(const char *ca, const char *cb)
Definition: template_blas_common.cc:46