ergo
mat_gblas.h
Go to the documentation of this file.
1 /* Ergo, version 3.7, a program for linear scaling electronic structure
2  * calculations.
3  * Copyright (C) 2018 Elias Rudberg, Emanuel H. Rubensson, Pawel Salek,
4  * and Anastasia Kruchinina.
5  *
6  * This program is free software: you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation, either version 3 of the License, or
9  * (at your option) any later version.
10  *
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program. If not, see <http://www.gnu.org/licenses/>.
18  *
19  * Primary academic reference:
20  * Ergo: An open-source program for linear-scaling electronic structure
21  * calculations,
22  * Elias Rudberg, Emanuel H. Rubensson, Pawel Salek, and Anastasia
23  * Kruchinina,
24  * SoftwareX 7, 107 (2018),
25  * <http://dx.doi.org/10.1016/j.softx.2018.03.005>
26  *
27  * For further information about Ergo, see <http://www.ergoscf.org>.
28  */
29 
45 #ifndef MAT_GBLAS_HEADER
46 #define MAT_GBLAS_HEADER
47 #include <ctime>
48 #include "Failure.h"
49 
50 /* We need to include config.h to get USE_LINALG_TEMPLATES and USE_SSE_INTRINSICS flags. */
51 #include "config.h"
52 
53 #include "template_lapack_common.h"
54 
55 #ifdef USE_SSE_INTRINSICS
56 #include "Memory_buffer_thread.h"
57 #include "gemm_sse/gemm_sse.h"
58 #endif
59 
60 /* LEVEL 3 */
61 extern "C" void dgemm_(const char *ta,const char *tb,
62  const int *n, const int *k, const int *l,
63  const double *alpha,const double *A,const int *lda,
64  const double *B, const int *ldb,
65  const double *beta, double *C, const int *ldc);
66 extern "C" void dpptrf_(const char *uplo,const int *n, double* ap, int *info);
67 extern "C" void dspgst_(const int *itype, const char *uplo,const int *n,
68  double* ap,const double *bp,int *info);
69 extern "C" void dtptri_(const char *uplo,const char *diag,const int *n,
70  double* ap,int *info);
71 /* unit triangular means that a value of 1.0 is assumed */
72 /* for the diagonal elements (hence diagonal not stored in packed format) */
73 extern "C" void dtrmm_(const char *side,const char *uplo,const char *transa,
74  const char *diag,const int *m,const int *n,
75  const double *alpha,const double *A,const int *lda,
76  double *B,const int *ldb);
77 extern "C" void dsygv_(const int *itype,const char *jobz,
78  const char *uplo,const int *n,
79  double *A,const int *lda,double *B,const int *ldb,
80  double* w,double* work,const int *lwork,int *info);
81 extern "C" void dggev_(const char *jobbl, const char *jobvr, const int *n,
82  double *A, const int *lda, double *B, const int *ldb,
83  double *alphar, double *alphai, double *beta,
84  double *vl, const int *ldvl,
85  double *vr, const int *ldvr,
86  double *work, const int *lwork, int *info);
87 extern "C" void dpotrf_(const char *uplo, const int *n, double *A,
88  const int *lda, int *info);
89 extern "C" void dtrtri_(const char *uplo,const char *diag,const int *n,
90  double *A, const int *lda, int *info);
91 extern "C" void dsyrk_(const char *uplo, const char *trans, const int *n,
92  const int *k, const double *alpha, const double *A,
93  const int *lda, const double *beta,
94  double *C, const int *ldc);
95 extern "C" void dsymm_(const char *side,const char *uplo,
96  const int *m,const int *n,
97  const double *alpha,const double *A,const int *lda,
98  const double *B,const int *ldb, const double* beta,
99  double *C,const int *ldc);
100 extern "C" void dpocon_(const char *uplo, const int *n, const double *A,
101  const int *lda, const double *anorm, double *rcond,
102  double *work, int *iwork, int *info);
103 extern "C" void dstevx_(const char *jobz, const char *range, const int *n,
104  double *d, double *e, const double *vl,
105  const double *vu, const int *il, const int *iu,
106  const double *abstol, int *m, double *w, double *z,
107  const int *ldz, double *work, int *iwork, int *ifail,
108  int *info);
109 extern "C" void dstevr_(const char *jobz, const char *range, const int *n,
110  double *d, double *e, const double *vl,
111  const double *vu, const int *il, const int *iu,
112  const double *abstol, int *m, double *w, double *z,
113  const int *ldz, int* isuppz, double *work, int* lwork,
114  int *iwork, int* liwork, int *info);
115 extern "C" void dsyev_(const char *jobz, const char *uplo, const int *n,
116  double *a, const int *lda, double *w, double *work,
117  const int *lwork, int *info);
118 
119 /* LEVEL 2 */
120 extern "C" void dgemv_(const char *ta, const int *m, const int *n,
121  const double *alpha, const double *A, const int *lda,
122  const double *x, const int *incx, const double *beta,
123  double *y, const int *incy);
124 extern "C" void dsymv_(const char *uplo, const int *n,
125  const double *alpha, const double *A, const int *lda,
126  const double *x, const int *incx, const double *beta,
127  double *y, const int *incy);
128 extern "C" void dtrmv_(const char *uplo, const char *trans, const char *diag,
129  const int *n, const double *A, const int *lda,
130  double *x, const int *incx);
131 /* LEVEL 1 */
132 extern "C" void dscal_(const int* n,const double* da, double* dx,
133  const int* incx);
134 extern "C" double ddot_(const int* n, const double* dx, const int* incx,
135  const double* dy, const int* incy);
136 extern "C" void daxpy_(const int* n, const double* da, const double* dx,
137  const int* incx, double* dy,const int* incy);
138 
139 /* Single precision */
140 /* LEVEL 3 */
141 extern "C" void sgemm_(const char *ta,const char *tb,
142  const int *n, const int *k, const int *l,
143  const float *alpha,const float *A,const int *lda,
144  const float *B, const int *ldb,
145  const float *beta, float *C, const int *ldc);
146 extern "C" void spptrf_(const char *uplo,const int *n, float* ap, int *info);
147 extern "C" void sspgst_(const int *itype, const char *uplo,const int *n,
148  float* ap,const float *bp,int *info);
149 extern "C" void stptri_(const char *uplo,const char *diag,const int *n,
150  float* ap,int *info);
151 /* unit triangular means that a value of 1.0 is assumed */
152 /* for the diagonal elements (hence diagonal not stored in packed format) */
153 extern "C" void strmm_(const char *side,const char *uplo,const char *transa,
154  const char *diag,const int *m,const int *n,
155  const float *alpha,const float *A,const int *lda,
156  float *B,const int *ldb);
157 extern "C" void ssygv_(const int *itype,const char *jobz,
158  const char *uplo,const int *n,
159  float *A,const int *lda,float *B,const int *ldb,
160  float* w,float* work,const int *lwork,int *info);
161 extern "C" void sggev_(const char *jobbl, const char *jobvr, const int *n,
162  float *A, const int *lda, float *B, const int *ldb,
163  float *alphar, float *alphai, float *beta,
164  float *vl, const int *ldvl,
165  float *vr, const int *ldvr,
166  float *work, const int *lwork, int *info);
167 extern "C" void spotrf_(const char *uplo, const int *n, float *A,
168  const int *lda, int *info);
169 extern "C" void strtri_(const char *uplo,const char *diag,const int *n,
170  float *A, const int *lda, int *info);
171 extern "C" void ssyrk_(const char *uplo, const char *trans, const int *n,
172  const int *k, const float *alpha, const float *A,
173  const int *lda, const float *beta,
174  float *C, const int *ldc);
175 extern "C" void ssymm_(const char *side,const char *uplo,
176  const int *m,const int *n,
177  const float *alpha,const float *A,const int *lda,
178  const float *B,const int *ldb, const float* beta,
179  float *C,const int *ldc);
180 extern "C" void spocon_(const char *uplo, const int *n, const float *A,
181  const int *lda, const float *anorm, float *rcond,
182  float *work, int *iwork, int *info);
183 extern "C" void sstevx_(const char *jobz, const char *range, const int *n,
184  float *d, float *e, const float *vl,
185  const float *vu, const int *il, const int *iu,
186  const float *abstol, int *m, float *w, float *z,
187  const int *ldz, float *work, int *iwork, int *ifail,
188  int *info);
189 extern "C" void sstevr_(const char *jobz, const char *range, const int *n,
190  float *d, float *e, const float *vl,
191  const float *vu, const int *il, const int *iu,
192  const float *abstol, int *m, float *w, float *z,
193  const int *ldz, int* isuppz, float *work, int* lwork,
194  int *iwork, int* liwork, int *info);
195 extern "C" void ssyev_(const char *jobz, const char *uplo, const int *n,
196  float *a, const int *lda, float *w, float *work,
197  const int *lwork, int *info);
198 
199 /* LEVEL 2 */
200 extern "C" void sgemv_(const char *ta, const int *m, const int *n,
201  const float *alpha, const float *A, const int *lda,
202  const float *x, const int *incx, const float *beta,
203  float *y, const int *incy);
204 extern "C" void ssymv_(const char *uplo, const int *n,
205  const float *alpha, const float *A, const int *lda,
206  const float *x, const int *incx, const float *beta,
207  float *y, const int *incy);
208 extern "C" void strmv_(const char *uplo, const char *trans, const char *diag,
209  const int *n, const float *A, const int *lda,
210  float *x, const int *incx);
211 /* LEVEL 1 */
212 extern "C" void sscal_(const int* n,const float* da, float* dx,
213  const int* incx);
214 #if 0
215 // sdot_ is unreliable because of varying return type in different
216 // implementations. We therefore always use template dot for single precision
217 extern "C" double sdot_(const int* n, const float* dx, const int* incx,
218  const float* dy, const int* incy);
219 #endif
220 extern "C" void saxpy_(const int* n, const float* da, const float* dx,
221  const int* incx, float* dy,const int* incy);
222 
223 namespace mat
224 {
225  struct Gblas {
226  static float time;
227  static bool timekeeping;
228  };
229 
230  /*************** Default version throws exception */
231  template<class T>
232  inline static void gemm(const char *ta,const char *tb,
233  const int *n, const int *k, const int *l,
234  const T *alpha,const T *A,const int *lda,
235  const T *B, const int *ldb,
236  const T *beta,T *C, const int *ldc) {
237 #ifdef USE_SSE_INTRINSICS
238  if (*ta == 'N' && *tb == 'N' && *n == 32 && *k == 32 && *l == 32 && *alpha == 1.0 && *beta == 1) {
239  static T * A_packed;
240  static T * B_packed;
241  static T * C_packed;
242  int pack_max_size = 10000;
243  if ( (pack_max_size*sizeof(T))%16 )
244  throw std::runtime_error("In gblas gemm: requested buffer size not multiple of 16 bytes");
245  static T * buffer;
246  Memory_buffer_thread::instance().get_buffer(pack_max_size*3, buffer);
247  A_packed = buffer;
248  B_packed = buffer + pack_max_size;
249  C_packed = buffer + 2*pack_max_size;
250  gemm_sse<T>(A,B,C,32,32,32,
251  A_packed, B_packed, C_packed,
252  pack_max_size, pack_max_size, pack_max_size);
253  }
254  else
255 #endif
256  template_blas_gemm(ta,tb,n,k,l,alpha,A,lda,B,ldb,beta,C,ldc);
257  }
258 
259 
260  /* Computes the Cholesky factorization of a symmetric *
261  * positive definite matrix in packed storage. */
262  template<class T>
263  inline static void pptrf(const char *uplo,const int *n, T* ap, int *info) {
264  template_lapack_pptrf(uplo,n,ap,info);
265  }
266 
267  template<class T>
268  inline static void spgst(const int *itype, const char *uplo,const int *n,
269  T* ap,const T *bp,int *info) {
270  template_lapack_spgst(itype,uplo,n,ap,bp,info);
271  }
272 
273  /* Computes the inverse of a triangular matrix in packed storage. */
274  template<class T>
275  inline static void tptri(const char *uplo,const char *diag,const int *n,
276  T* ap,int *info) {
277  template_lapack_tptri(uplo,diag,n,ap,info);
278  }
279 
280  template<class T>
281  inline static void trmm(const char *side,const char *uplo,
282  const char *transa, const char *diag,
283  const int *m,const int *n,
284  const T *alpha,const T *A,const int *lda,
285  T *B,const int *ldb) {
286  template_blas_trmm(side,uplo,transa,diag,m,n,alpha,A,lda,B,ldb);
287  }
288 
289  /* Computes all eigenvalues and the eigenvectors of a generalized *
290  * symmetric-definite generalized eigenproblem, *
291  * Ax= lambda Bx, ABx= lambda x, or BAx= lambda x. */
292  template<class T>
293  inline static void sygv(const int *itype,const char *jobz,
294  const char *uplo,const int *n,
295  T *A,const int *lda,T *B,const int *ldb,
296  T* w,T* work,const int *lwork,int *info) {
297  template_lapack_sygv(itype,jobz,uplo,n,A,lda,B,ldb,w,work,lwork,info);
298  }
299 
300  template<class T>
301  inline static void ggev(const char *jobbl, const char *jobvr,
302  const int *n, T *A, const int *lda,
303  T *B, const int *ldb, T *alphar,
304  T *alphai, T *beta, T *vl,
305  const int *ldvl, T *vr, const int *ldvr,
306  T *work, const int *lwork, int *info) {
307  template_lapack_ggev(jobbl, jobvr, n, A, lda, B, ldb, alphar, alphai, beta, vl,
308  ldvl, vr, ldvr, work, lwork, info);
309  }
310 
311  /* Computes the Cholesky factorization of a symmetric *
312  * positive definite matrix in packed storage. */
313  template<class T>
314  inline static void potrf(const char *uplo, const int *n, T *A,
315  const int *lda, int *info) {
316  template_lapack_potrf(uplo, n, A, lda, info);
317  }
318 
319  /* Computes the inverse of a triangular matrix. */
320  template<class T>
321  inline static void trtri(const char *uplo,const char *diag,const int *n,
322  T *A, const int *lda, int *info) {
323  // Create copies of strings because they cannot be const inside trtri.
324  char uploCopy[2];
325  char diagCopy[2];
326  uploCopy[0] = uplo[0];
327  uploCopy[1] = '\0';
328  diagCopy[0] = diag[0];
329  diagCopy[1] = '\0';
330  template_lapack_trtri(uploCopy, diagCopy, n, A, lda, info);
331  }
332 
333  template<class T>
334  inline static void syrk(const char *uplo, const char *trans, const int *n,
335  const int *k, const T *alpha, const T *A,
336  const int *lda, const T *beta,
337  T *C, const int *ldc) {
338  template_blas_syrk(uplo, trans, n, k, alpha, A, lda, beta, C, ldc);
339  }
340 
341  template<class T>
342  inline static void symm(const char *side,const char *uplo,
343  const int *m,const int *n,
344  const T *alpha,const T *A,const int *lda,
345  const T *B,const int *ldb, const T* beta,
346  T *C,const int *ldc) {
347  template_blas_symm(side, uplo, m, n, alpha, A, lda, B, ldb, beta, C, ldc);
348  }
349 
350  template<class T>
351  inline static void pocon(const char *uplo, const int *n, const T *A,
352  const int *lda, const T *anorm, T *rcond,
353  T *work, int *iwork, int *info) {
354  template_lapack_pocon(uplo, n, A, lda, anorm, rcond, work, iwork, info);
355  }
356 
357  template<class T>
358  inline static void stevx(const char *jobz, const char *range,
359  const int *n, T *d, T *e, const T *vl,
360  const T *vu, const int *il, const int *iu,
361  const T *abstol, int *m, T *w, T *z,
362  const int *ldz, T *work, int *iwork, int *ifail,
363  int *info) {
364  template_lapack_stevx(jobz, range, n, d, e, vl, vu, il, iu, abstol, m, w, z, ldz,
365  work, iwork, ifail, info);
366  }
367 
368  template<class T>
369  inline static void stevr(const char *jobz, const char *range, const int *n,
370  T *d, T *e, const T *vl,
371  const T *vu, const int *il, const int *iu,
372  const T *abstol, int *m, T *w, T *z,
373  const int *ldz, int* isuppz, T *work, int* lwork,
374  int *iwork, int* liwork, int *info) {
375  template_lapack_stevr(jobz, range, n, d, e, vl, vu, il, iu, abstol,
376  m, w, z, ldz, isuppz,
377  work, lwork, iwork, liwork, info);
378  }
379 
380 
381  template<class T>
382  inline static void syev(const char *jobz, const char *uplo, const int *n,
383  T *a, const int *lda, T *w, T *work,
384  const int *lwork, int *info) {
385  template_lapack_syev(jobz, uplo, n, a, lda, w, work, lwork, info);
386  }
387 
388 
389  /* LEVEL 2 */
390  template<class T>
391  inline static void gemv(const char *ta, const int *m, const int *n,
392  const T *alpha, const T *A,
393  const int *lda,
394  const T *x, const int *incx,
395  const T *beta, T *y, const int *incy) {
396  template_blas_gemv(ta, m, n, alpha, A, lda, x, incx, beta, y, incy);
397  }
398 
399  template<class T>
400  inline static void symv(const char *uplo, const int *n,
401  const T *alpha, const T *A,
402  const int *lda, const T *x,
403  const int *incx, const T *beta,
404  T *y, const int *incy) {
405  template_blas_symv(uplo, n, alpha, A, lda, x, incx, beta, y, incy);
406  }
407 
408  template<class T>
409  inline static void trmv(const char *uplo, const char *trans,
410  const char *diag, const int *n,
411  const T *A, const int *lda,
412  T *x, const int *incx) {
413  template_blas_trmv(uplo, trans, diag, n, A, lda, x, incx);
414  }
415 
416 
417  /* LEVEL 1 */
418  template<class T>
419  inline static void scal(const int* n,const T* da, T* dx,
420  const int* incx) {
421  template_blas_scal(n, da, dx, incx);
422  }
423 
424  template<class T>
425  inline static T dot(const int* n, const T* dx, const int* incx,
426  const T* dy, const int* incy) {
427  return template_blas_dot(n, dx, incx, dy, incy);
428  }
429 
430  template<class T>
431  inline static void axpy(const int* n, const T* da, const T* dx,
432  const int* incx, T* dy,const int* incy) {
433  template_blas_axpy(n, da, dx, incx, dy, incy);
434  }
435 
436 
437 
438 
439  /* Below follows specializations for double, single, etc.
440  These specializations are not needed if template_blas and template_lapack are used,
441  so in that case we skip this entire section. */
442 #ifndef USE_LINALG_TEMPLATES
443 
444 
445  /*************** Double specialization */
446  template<>
447  inline void gemm<double>(const char *ta,const char *tb,
448  const int *n, const int *k, const int *l,
449  const double *alpha,
450  const double *A,const int *lda,
451  const double *B, const int *ldb,
452  const double *beta,
453  double *C, const int *ldc) {
454  if (Gblas::timekeeping) {
455  clock_t start = clock();
456  dgemm_(ta,tb,n,k,l,alpha,A,lda,B,ldb,beta,C,ldc);
457  Gblas::time += ((float)(clock() - start)) / (CLOCKS_PER_SEC);
458  }
459  else {
460  dgemm_(ta,tb,n,k,l,alpha,A,lda,B,ldb,beta,C,ldc);
461  }
462  }
463 
464  template<>
465  inline void pptrf<double>(const char *uplo,const int *n,
466  double* ap, int *info) {
467  if (Gblas::timekeeping) {
468  clock_t start = clock();
469  dpptrf_(uplo,n,ap,info);
470  Gblas::time += ((float)(clock() - start)) / (CLOCKS_PER_SEC);
471  }
472  else {
473  dpptrf_(uplo,n,ap,info);
474  }
475  }
476 
477  template<>
478  inline void spgst<double>(const int *itype, const char *uplo,
479  const int *n,
480  double* ap,const double *bp,int *info) {
481  if (Gblas::timekeeping) {
482  clock_t start = clock();
483  dspgst_(itype,uplo,n,ap,bp,info);
484  Gblas::time += ((float)(clock() - start)) / (CLOCKS_PER_SEC);
485  }
486  else {
487  dspgst_(itype,uplo,n,ap,bp,info);
488  }
489  }
490 
491  template<>
492  inline void tptri<double>(const char *uplo,const char *diag,const int *n,
493  double* ap,int *info) {
494  if (Gblas::timekeeping) {
495  clock_t start = clock();
496  dtptri_(uplo,diag,n,ap,info);
497  Gblas::time += ((float)(clock() - start)) / (CLOCKS_PER_SEC);
498  }
499  else {
500  dtptri_(uplo,diag,n,ap,info);
501  }
502  }
503 
504  template<>
505  inline void trmm<double>(const char *side,const char *uplo,
506  const char *transa,
507  const char *diag,const int *m,const int *n,
508  const double *alpha,
509  const double *A,const int *lda,
510  double *B,const int *ldb) {
511  if (Gblas::timekeeping) {
512  clock_t start = clock();
513  dtrmm_(side,uplo,transa,diag,m,n,alpha,A,lda,B,ldb);
514  Gblas::time += ((float)(clock() - start)) / (CLOCKS_PER_SEC);
515  }
516  else {
517  dtrmm_(side,uplo,transa,diag,m,n,alpha,A,lda,B,ldb);
518  }
519  }
520 
521  template<>
522  inline void sygv<double>(const int *itype,const char *jobz,
523  const char *uplo,const int *n,
524  double *A,const int *lda,
525  double *B,const int *ldb,
526  double* w,double* work,
527  const int *lwork,int *info) {
528  if (Gblas::timekeeping) {
529  clock_t start = clock();
530  dsygv_(itype,jobz,uplo,n,A,lda,B,ldb,w,work,lwork,info);
531  Gblas::time += ((float)(clock() - start)) / (CLOCKS_PER_SEC);
532  }
533  else {
534  dsygv_(itype,jobz,uplo,n,A,lda,B,ldb,w,work,lwork,info);
535  }
536  }
537 
538  template<>
539  inline void ggev<double>(const char *jobbl, const char *jobvr,
540  const int *n, double *A, const int *lda,
541  double *B, const int *ldb, double *alphar,
542  double *alphai, double *beta, double *vl,
543  const int *ldvl, double *vr, const int *ldvr,
544  double *work, const int *lwork, int *info) {
545  if (Gblas::timekeeping) {
546  clock_t start = clock();
547  dggev_(jobbl, jobvr, n, A, lda, B, ldb, alphar, alphai, beta, vl,
548  ldvl, vr, ldvr, work, lwork, info);
549  Gblas::time += ((float)(clock() - start)) / (CLOCKS_PER_SEC);
550  }
551  else {
552  dggev_(jobbl, jobvr, n, A, lda, B, ldb, alphar, alphai, beta, vl,
553  ldvl, vr, ldvr, work, lwork, info);
554  }
555  }
556 
557 
558  template<>
559  inline void potrf<double>(const char *uplo, const int *n, double *A,
560  const int *lda, int *info) {
561  if (Gblas::timekeeping) {
562  clock_t start = clock();
563  dpotrf_(uplo, n, A, lda, info);
564  Gblas::time += ((float)(clock() - start)) / (CLOCKS_PER_SEC);
565  }
566  else {
567  dpotrf_(uplo, n, A, lda, info);
568  }
569  }
570 
571  template<>
572  inline void trtri<double>(const char *uplo,const char *diag,const int *n,
573  double *A, const int *lda, int *info) {
574  if (Gblas::timekeeping) {
575  clock_t start = clock();
576  dtrtri_(uplo, diag, n, A, lda, info);
577  Gblas::time += ((float)(clock() - start)) / (CLOCKS_PER_SEC);
578  }
579  else {
580  dtrtri_(uplo, diag, n, A, lda, info);
581  }
582  }
583 
584  template<>
585  inline void syrk<double>(const char *uplo, const char *trans,
586  const int *n, const int *k, const double *alpha,
587  const double *A, const int *lda,
588  const double *beta, double *C, const int *ldc) {
589  if (Gblas::timekeeping) {
590  clock_t start = clock();
591  dsyrk_(uplo, trans, n, k, alpha, A, lda, beta, C, ldc);
592  Gblas::time += ((float)(clock() - start)) / (CLOCKS_PER_SEC);
593  }
594  else {
595  dsyrk_(uplo, trans, n, k, alpha, A, lda, beta, C, ldc);
596  }
597  }
598 
599  template<>
600  inline void symm<double>(const char *side,const char *uplo,
601  const int *m,const int *n, const double *alpha,
602  const double *A,const int *lda,
603  const double *B,const int *ldb,
604  const double* beta,
605  double *C,const int *ldc) {
606  if (Gblas::timekeeping) {
607  clock_t start = clock();
608  dsymm_(side, uplo, m, n, alpha, A, lda, B, ldb, beta, C, ldc);
609  Gblas::time += ((float)(clock() - start)) / (CLOCKS_PER_SEC);
610  }
611  else {
612  dsymm_(side, uplo, m, n, alpha, A, lda, B, ldb, beta, C, ldc);
613  }
614  }
615 
616  template<>
617  inline void pocon<double>(const char *uplo, const int *n,
618  const double *A, const int *lda,
619  const double *anorm, double *rcond,
620  double *work, int *iwork, int *info) {
621  if (Gblas::timekeeping) {
622  clock_t start = clock();
623  dpocon_(uplo, n, A, lda, anorm, rcond, work, iwork, info);
624  Gblas::time += ((float)(clock() - start)) / (CLOCKS_PER_SEC);
625  }
626  else {
627  dpocon_(uplo, n, A, lda, anorm, rcond, work, iwork, info);
628  }
629  }
630 
631  template<>
632  inline void stevx<double>(const char *jobz, const char *range,
633  const int *n, double *d, double *e,
634  const double *vl,
635  const double *vu, const int *il, const int *iu,
636  const double *abstol, int *m, double *w,
637  double *z,
638  const int *ldz, double *work, int *iwork,
639  int *ifail, int *info) {
640  if (Gblas::timekeeping) {
641  clock_t start = clock();
642  dstevx_(jobz, range, n, d, e, vl, vu, il, iu, abstol, m, w, z, ldz,
643  work, iwork, ifail, info);
644  Gblas::time += ((float)(clock() - start)) / (CLOCKS_PER_SEC);
645  }
646  else {
647  dstevx_(jobz, range, n, d, e, vl, vu, il, iu, abstol, m, w, z, ldz,
648  work, iwork, ifail, info);
649  }
650  }
651 
652  template<>
653  inline void stevr<double>(const char *jobz, const char *range,
654  const int *n, double *d, double *e,
655  const double *vl, const double *vu,
656  const int *il, const int *iu,
657  const double *abstol,
658  int *m, double *w,
659  double *z, const int *ldz, int* isuppz,
660  double *work, int* lwork,
661  int *iwork, int* liwork, int *info) {
662  if (Gblas::timekeeping) {
663  clock_t start = clock();
664  dstevr_(jobz, range, n, d, e, vl, vu, il, iu, abstol,
665  m, w, z, ldz, isuppz, work, lwork, iwork, liwork, info);
666  Gblas::time += ((float)(clock() - start)) / (CLOCKS_PER_SEC);
667  }
668  else {
669  dstevr_(jobz, range, n, d, e, vl, vu, il, iu, abstol,
670  m, w, z, ldz, isuppz, work, lwork, iwork, liwork, info);
671  }
672  }
673 
674 
675 
676  template<>
677  inline void syev<double>(const char *jobz, const char *uplo, const int *n,
678  double *a, const int *lda, double *w,
679  double *work, const int *lwork, int *info) {
680  if (Gblas::timekeeping) {
681  clock_t start = clock();
682  dsyev_(jobz, uplo, n, a, lda, w, work, lwork, info);
683  Gblas::time += ((float)(clock() - start)) / (CLOCKS_PER_SEC);
684  }
685  else {
686  dsyev_(jobz, uplo, n, a, lda, w, work, lwork, info);
687  }
688  }
689 
690 
691  /* LEVEL 2 */
692  template<>
693  inline void gemv<double>(const char *ta, const int *m, const int *n,
694  const double *alpha, const double *A,
695  const int *lda,
696  const double *x, const int *incx,
697  const double *beta, double *y, const int *incy) {
698  if (Gblas::timekeeping) {
699  clock_t start = clock();
700  dgemv_(ta, m, n, alpha, A, lda, x, incx, beta, y, incy);
701  Gblas::time += ((float)(clock() - start)) / (CLOCKS_PER_SEC);
702  }
703  else {
704  dgemv_(ta, m, n, alpha, A, lda, x, incx, beta, y, incy);
705  }
706  }
707 
708  template<>
709  inline void symv<double>(const char *uplo, const int *n,
710  const double *alpha, const double *A,
711  const int *lda, const double *x,
712  const int *incx, const double *beta,
713  double *y, const int *incy) {
714  if (Gblas::timekeeping) {
715  clock_t start = clock();
716  dsymv_(uplo, n, alpha, A, lda, x, incx, beta, y, incy);
717  Gblas::time += ((float)(clock() - start)) / (CLOCKS_PER_SEC);
718  }
719  else {
720  dsymv_(uplo, n, alpha, A, lda, x, incx, beta, y, incy);
721  }
722  }
723 
724  template<>
725  inline void trmv<double>(const char *uplo, const char *trans,
726  const char *diag, const int *n,
727  const double *A, const int *lda,
728  double *x, const int *incx) {
729  if (Gblas::timekeeping) {
730  clock_t start = clock();
731  dtrmv_(uplo, trans, diag, n, A, lda, x, incx);
732  Gblas::time += ((float)(clock() - start)) / (CLOCKS_PER_SEC);
733  }
734  else {
735  dtrmv_(uplo, trans, diag, n, A, lda, x, incx);
736  }
737  }
738 
739 
740  /* LEVEL 1 */
741  template<>
742  inline void scal<double>(const int* n,const double* da, double* dx,
743  const int* incx) {
744  if (Gblas::timekeeping) {
745  clock_t start = clock();
746  dscal_(n, da, dx, incx);
747  Gblas::time += ((float)(clock() - start)) / (CLOCKS_PER_SEC);
748  }
749  else {
750  dscal_(n, da, dx, incx);
751  }
752  }
753 
754  template<>
755  inline double dot<double>(const int* n, const double* dx, const int* incx,
756  const double* dy, const int* incy) {
757  double tmp = 0;
758  if (Gblas::timekeeping) {
759  clock_t start = clock();
760  tmp = ddot_(n, dx, incx, dy, incy);
761  Gblas::time += ((float)(clock() - start)) / (CLOCKS_PER_SEC);
762  }
763  else {
764  tmp = ddot_(n, dx, incx, dy, incy);
765  }
766  return tmp;
767  }
768 
769  template<>
770  inline void axpy<double>(const int* n, const double* da, const double* dx,
771  const int* incx, double* dy,const int* incy) {
772  if (Gblas::timekeeping) {
773  clock_t start = clock();
774  daxpy_(n, da, dx, incx, dy, incy);
775  Gblas::time += ((float)(clock() - start)) / (CLOCKS_PER_SEC);
776  }
777  else {
778  daxpy_(n, da, dx, incx, dy, incy);
779  }
780  }
781 
782 
783  /*************** Single specialization */
784  template<>
785  inline void gemm<float>(const char *ta,const char *tb,
786  const int *n, const int *k, const int *l,
787  const float *alpha,
788  const float *A,const int *lda,
789  const float *B, const int *ldb,
790  const float *beta,
791  float *C, const int *ldc) {
792  if (Gblas::timekeeping) {
793  clock_t start = clock();
794  sgemm_(ta,tb,n,k,l,alpha,A,lda,B,ldb,beta,C,ldc);
795  Gblas::time += ((float)(clock() - start)) / (CLOCKS_PER_SEC);
796  }
797  else {
798  sgemm_(ta,tb,n,k,l,alpha,A,lda,B,ldb,beta,C,ldc);
799  }
800  }
801 
802  template<>
803  inline void pptrf<float>(const char *uplo,const int *n,
804  float* ap, int *info) {
805  if (Gblas::timekeeping) {
806  clock_t start = clock();
807  spptrf_(uplo,n,ap,info);
808  Gblas::time += ((float)(clock() - start)) / (CLOCKS_PER_SEC);
809  }
810  else {
811  spptrf_(uplo,n,ap,info);
812  }
813  }
814 
815  template<>
816  inline void spgst<float>(const int *itype, const char *uplo,
817  const int *n,
818  float* ap,const float *bp,int *info) {
819  if (Gblas::timekeeping) {
820  clock_t start = clock();
821  sspgst_(itype,uplo,n,ap,bp,info);
822  Gblas::time += ((float)(clock() - start)) / (CLOCKS_PER_SEC);
823  }
824  else {
825  sspgst_(itype,uplo,n,ap,bp,info);
826  }
827  }
828 
829  template<>
830  inline void tptri<float>(const char *uplo,const char *diag,
831  const int *n,
832  float* ap,int *info) {
833  if (Gblas::timekeeping) {
834  clock_t start = clock();
835  stptri_(uplo,diag,n,ap,info);
836  Gblas::time += ((float)(clock() - start)) / (CLOCKS_PER_SEC);
837  }
838  else {
839  stptri_(uplo,diag,n,ap,info);
840  }
841  }
842 
843  template<>
844  inline void trmm<float>(const char *side,const char *uplo,
845  const char *transa,
846  const char *diag,const int *m,const int *n,
847  const float *alpha,
848  const float *A,const int *lda,
849  float *B,const int *ldb) {
850  if (Gblas::timekeeping) {
851  clock_t start = clock();
852  strmm_(side,uplo,transa,diag,m,n,alpha,A,lda,B,ldb);
853  Gblas::time += ((float)(clock() - start)) / (CLOCKS_PER_SEC);
854  }
855  else {
856  strmm_(side,uplo,transa,diag,m,n,alpha,A,lda,B,ldb);
857  }
858  }
859 
860  template<>
861  inline void sygv<float>(const int *itype,const char *jobz,
862  const char *uplo,const int *n,
863  float *A,const int *lda,
864  float *B,const int *ldb,
865  float* w,float* work,
866  const int *lwork,int *info) {
867  if (Gblas::timekeeping) {
868  clock_t start = clock();
869  ssygv_(itype,jobz,uplo,n,A,lda,B,ldb,w,work,lwork,info);
870  Gblas::time += ((float)(clock() - start)) / (CLOCKS_PER_SEC);
871  }
872  else {
873  ssygv_(itype,jobz,uplo,n,A,lda,B,ldb,w,work,lwork,info);
874  }
875  }
876 
877  template<>
878  inline void ggev<float>(const char *jobbl, const char *jobvr,
879  const int *n, float *A, const int *lda,
880  float *B, const int *ldb, float *alphar,
881  float *alphai, float *beta, float *vl,
882  const int *ldvl, float *vr, const int *ldvr,
883  float *work, const int *lwork, int *info) {
884  if (Gblas::timekeeping) {
885  clock_t start = clock();
886  sggev_(jobbl, jobvr, n, A, lda, B, ldb, alphar, alphai, beta, vl,
887  ldvl, vr, ldvr, work, lwork, info);
888  Gblas::time += ((float)(clock() - start)) / (CLOCKS_PER_SEC);
889  }
890  else {
891  sggev_(jobbl, jobvr, n, A, lda, B, ldb, alphar, alphai, beta, vl,
892  ldvl, vr, ldvr, work, lwork, info);
893  }
894  }
895 
896 
897  template<>
898  inline void potrf<float>(const char *uplo, const int *n, float *A,
899  const int *lda, int *info) {
900  if (Gblas::timekeeping) {
901  clock_t start = clock();
902  spotrf_(uplo, n, A, lda, info);
903  Gblas::time += ((float)(clock() - start)) / (CLOCKS_PER_SEC);
904  }
905  else {
906  spotrf_(uplo, n, A, lda, info);
907  }
908  }
909 
910  template<>
911  inline void trtri<float>(const char *uplo,const char *diag,const int *n,
912  float *A, const int *lda, int *info) {
913  if (Gblas::timekeeping) {
914  clock_t start = clock();
915  strtri_(uplo, diag, n, A, lda, info);
916  Gblas::time += ((float)(clock() - start)) / (CLOCKS_PER_SEC);
917  }
918  else {
919  strtri_(uplo, diag, n, A, lda, info);
920  }
921  }
922 
923  template<>
924  inline void syrk<float>(const char *uplo, const char *trans,
925  const int *n, const int *k, const float *alpha,
926  const float *A, const int *lda,
927  const float *beta, float *C, const int *ldc) {
928  if (Gblas::timekeeping) {
929  clock_t start = clock();
930  ssyrk_(uplo, trans, n, k, alpha, A, lda, beta, C, ldc);
931  Gblas::time += ((float)(clock() - start)) / (CLOCKS_PER_SEC);
932  }
933  else {
934  ssyrk_(uplo, trans, n, k, alpha, A, lda, beta, C, ldc);
935  }
936  }
937 
938  template<>
939  inline void symm<float>(const char *side,const char *uplo,
940  const int *m,const int *n, const float *alpha,
941  const float *A,const int *lda,
942  const float *B,const int *ldb,
943  const float* beta,
944  float *C,const int *ldc) {
945  if (Gblas::timekeeping) {
946  clock_t start = clock();
947  ssymm_(side, uplo, m, n, alpha, A, lda, B, ldb, beta, C, ldc);
948  Gblas::time += ((float)(clock() - start)) / (CLOCKS_PER_SEC);
949  }
950  else {
951  ssymm_(side, uplo, m, n, alpha, A, lda, B, ldb, beta, C, ldc);
952  }
953  }
954 
955  template<>
956  inline void pocon<float>(const char *uplo, const int *n,
957  const float *A, const int *lda,
958  const float *anorm, float *rcond,
959  float *work, int *iwork, int *info) {
960  if (Gblas::timekeeping) {
961  clock_t start = clock();
962  spocon_(uplo, n, A, lda, anorm, rcond, work, iwork, info);
963  Gblas::time += ((float)(clock() - start)) / (CLOCKS_PER_SEC);
964  }
965  else {
966  spocon_(uplo, n, A, lda, anorm, rcond, work, iwork, info);
967  }
968  }
969 
970  template<>
971  inline void stevx<float>(const char *jobz, const char *range,
972  const int *n, float *d, float *e,
973  const float *vl,
974  const float *vu, const int *il, const int *iu,
975  const float *abstol, int *m, float *w,
976  float *z,
977  const int *ldz, float *work, int *iwork,
978  int *ifail, int *info) {
979  if (Gblas::timekeeping) {
980  clock_t start = clock();
981  sstevx_(jobz, range, n, d, e, vl, vu, il, iu, abstol, m, w, z, ldz,
982  work, iwork, ifail, info);
983  Gblas::time += ((float)(clock() - start)) / (CLOCKS_PER_SEC);
984  }
985  else {
986  sstevx_(jobz, range, n, d, e, vl, vu, il, iu, abstol, m, w, z, ldz,
987  work, iwork, ifail, info);
988  }
989  }
990 
991  template<>
992  inline void stevr<float>(const char *jobz, const char *range,
993  const int *n, float *d, float *e,
994  const float *vl, const float *vu,
995  const int *il, const int *iu,
996  const float *abstol,
997  int *m, float *w,
998  float *z, const int *ldz, int* isuppz,
999  float *work, int* lwork,
1000  int *iwork, int* liwork, int *info) {
1001  if (Gblas::timekeeping) {
1002  clock_t start = clock();
1003  sstevr_(jobz, range, n, d, e, vl, vu, il, iu, abstol,
1004  m, w, z, ldz, isuppz, work, lwork, iwork, liwork, info);
1005  Gblas::time += ((float)(clock() - start)) / (CLOCKS_PER_SEC);
1006  }
1007  else {
1008  sstevr_(jobz, range, n, d, e, vl, vu, il, iu, abstol,
1009  m, w, z, ldz, isuppz, work, lwork, iwork, liwork, info);
1010  }
1011  }
1012 
1013  template<>
1014  inline void syev<float>(const char *jobz, const char *uplo, const int *n,
1015  float *a, const int *lda, float *w,
1016  float *work, const int *lwork, int *info) {
1017  if (Gblas::timekeeping) {
1018  clock_t start = clock();
1019  ssyev_(jobz, uplo, n, a, lda, w, work, lwork, info);
1020  Gblas::time += ((float)(clock() - start)) / (CLOCKS_PER_SEC);
1021  }
1022  else {
1023  ssyev_(jobz, uplo, n, a, lda, w, work, lwork, info);
1024  }
1025  }
1026 
1027 
1028  /* LEVEL 2 */
1029  template<>
1030  inline void gemv<float>(const char *ta, const int *m, const int *n,
1031  const float *alpha, const float *A,
1032  const int *lda,
1033  const float *x, const int *incx,
1034  const float *beta, float *y, const int *incy) {
1035  if (Gblas::timekeeping) {
1036  clock_t start = clock();
1037  sgemv_(ta, m, n, alpha, A, lda, x, incx, beta, y, incy);
1038  Gblas::time += ((float)(clock() - start)) / (CLOCKS_PER_SEC);
1039  }
1040  else {
1041  sgemv_(ta, m, n, alpha, A, lda, x, incx, beta, y, incy);
1042  }
1043  }
1044 
1045  template<>
1046  inline void symv<float>(const char *uplo, const int *n,
1047  const float *alpha, const float *A,
1048  const int *lda, const float *x,
1049  const int *incx, const float *beta,
1050  float *y, const int *incy) {
1051  if (Gblas::timekeeping) {
1052  clock_t start = clock();
1053  ssymv_(uplo, n, alpha, A, lda, x, incx, beta, y, incy);
1054  Gblas::time += ((float)(clock() - start)) / (CLOCKS_PER_SEC);
1055  }
1056  else {
1057  ssymv_(uplo, n, alpha, A, lda, x, incx, beta, y, incy);
1058  }
1059  }
1060 
1061  template<>
1062  inline void trmv<float>(const char *uplo, const char *trans,
1063  const char *diag, const int *n,
1064  const float *A, const int *lda,
1065  float *x, const int *incx) {
1066  if (Gblas::timekeeping) {
1067  clock_t start = clock();
1068  strmv_(uplo, trans, diag, n, A, lda, x, incx);
1069  Gblas::time += ((float)(clock() - start)) / (CLOCKS_PER_SEC);
1070  }
1071  else {
1072  strmv_(uplo, trans, diag, n, A, lda, x, incx);
1073  }
1074  }
1075 
1076  /* LEVEL 1 */
1077  template<>
1078  inline void scal<float>(const int* n,const float* da, float* dx,
1079  const int* incx) {
1080  if (Gblas::timekeeping) {
1081  clock_t start = clock();
1082  sscal_(n, da, dx, incx);
1083  Gblas::time += ((float)(clock() - start)) / (CLOCKS_PER_SEC);
1084  }
1085  else {
1086  sscal_(n, da, dx, incx);
1087  }
1088  }
1089 #if 0
1090  // Sdot has different return type in different BLAS implementations
1091  // Therefore the specialization for single precision is removed
1092  template<>
1093  inline float dot<float>(const int* n, const float* dx, const int* incx,
1094  const float* dy, const int* incy) {
1095  float tmp;
1096  if (Gblas::timekeeping) {
1097  clock_t start = clock();
1098  sdot_(n, dx, incx, dy, incy);
1099  Gblas::time += ((float)(clock() - start)) / (CLOCKS_PER_SEC);
1100  }
1101  else {
1102  sdot_(n, dx, incx, dy, incy);
1103  }
1104  return tmp;
1105  }
1106 #endif
1107 
1108  template<>
1109  inline void axpy<float>(const int* n, const float* da, const float* dx,
1110  const int* incx, float* dy,const int* incy) {
1111  if (Gblas::timekeeping) {
1112  clock_t start = clock();
1113  saxpy_(n, da, dx, incx, dy, incy);
1114  Gblas::time += ((float)(clock() - start)) / (CLOCKS_PER_SEC);
1115  }
1116  else {
1117  saxpy_(n, da, dx, incx, dy, incy);
1118  }
1119  }
1120 
1121  /* END OF SPECIALIZATIONS */
1122 #endif
1123 
1124 
1125 
1126 
1127 
1128 
1129 
1130 
1131 
1132  /* Other */
1133 
1134  template<class Treal>
1135  static void fulltopacked(const Treal* full, Treal* packed, const int size){
1136  int pind=0;
1137  for (int col=0;col<size;col++)
1138  {
1139  for(int row=0;row<=col;row++)
1140  {
1141  packed[pind]=full[col*size+row];
1142  pind++;
1143  }
1144  }
1145  }
1146 
1147  template<class Treal>
1148  static void packedtofull(const Treal* packed, Treal* full, const int size){
1149  int psize=(size+1)*size/2;
1150  int col=0;
1151  int row=0;
1152  for(int pind=0;pind<psize;pind++)
1153  {
1154  if (col==row)
1155  {
1156  full[col*size+row]=packed[pind];
1157  col++;
1158  row=0;
1159  }
1160  else
1161  {
1162  full[col*size+row]=packed[pind];
1163  full[row*size+col]=packed[pind];
1164  row++;
1165  }
1166  }
1167  }
1168 
1169  template<class Treal>
1170  static void tripackedtofull(const Treal* packed,Treal* full,
1171  const int size) {
1172  int psize=(size+1)*size/2;
1173  int col=0;
1174  int row=0;
1175  for(int pind=0;pind<psize;pind++)
1176  {
1177  if (col==row)
1178  {
1179  full[col*size+row]=packed[pind];
1180  col++;
1181  row=0;
1182  }
1183  else
1184  {
1185  full[col*size+row]=packed[pind];
1186  full[row*size+col]=0;
1187  row++;
1188  }
1189  }
1190  }
1191 
1192  template<class Treal>
1193  static void trifulltofull(Treal* trifull, const int size) {
1194  for(int col = 0; col < size - 1; col++)
1195  for(int row = col + 1; row < size; row++)
1196  trifull[col * size + row] = 0;
1197  }
1198 
1199 
1200 } /* namespace mat */
1201 
1202 #endif /* GBLAS */
void spptrf_(const char *uplo, const int *n, float *ap, int *info)
void potrf< float >(const char *uplo, const int *n, float *A, const int *lda, int *info)
Definition: mat_gblas.h:898
void trmm< float >(const char *side, const char *uplo, const char *transa, const char *diag, const int *m, const int *n, const float *alpha, const float *A, const int *lda, float *B, const int *ldb)
Definition: mat_gblas.h:844
#define A
int template_blas_scal(const integer *n, const Treal *da, Treal *dx, const integer *incx)
Definition: template_blas_scal.h:43
void pptrf< float >(const char *uplo, const int *n, float *ap, int *info)
Definition: mat_gblas.h:803
void gemv< float >(const char *ta, const int *m, const int *n, const float *alpha, const float *A, const int *lda, const float *x, const int *incx, const float *beta, float *y, const int *incy)
Definition: mat_gblas.h:1030
static void trifulltofull(Treal *trifull, const int size)
Definition: mat_gblas.h:1193
void dpocon_(const char *uplo, const int *n, const double *A, const int *lda, const double *anorm, double *rcond, double *work, int *iwork, int *info)
void sgemv_(const char *ta, const int *m, const int *n, const float *alpha, const float *A, const int *lda, const float *x, const int *incx, const float *beta, float *y, const int *incy)
void spgst< float >(const int *itype, const char *uplo, const int *n, float *ap, const float *bp, int *info)
Definition: mat_gblas.h:816
void dtrmm_(const char *side, const char *uplo, const char *transa, const char *diag, const int *m, const int *n, const double *alpha, const double *A, const int *lda, double *B, const int *ldb)
Definition: mat_gblas.h:225
static bool timekeeping
Definition: mat_gblas.h:227
void symm< float >(const char *side, const char *uplo, const int *m, const int *n, const float *alpha, const float *A, const int *lda, const float *B, const int *ldb, const float *beta, float *C, const int *ldc)
Definition: mat_gblas.h:939
void stevx< float >(const char *jobz, const char *range, const int *n, float *d, float *e, const float *vl, const float *vu, const int *il, const int *iu, const float *abstol, int *m, float *w, float *z, const int *ldz, float *work, int *iwork, int *ifail, int *info)
Definition: mat_gblas.h:971
void dsygv_(const int *itype, const char *jobz, const char *uplo, const int *n, double *A, const int *lda, double *B, const int *ldb, double *w, double *work, const int *lwork, int *info)
void dtrmv_(const char *uplo, const char *trans, const char *diag, const int *n, const double *A, const int *lda, double *x, const int *incx)
void pptrf< double >(const char *uplo, const int *n, double *ap, int *info)
Definition: mat_gblas.h:465
void gemv< double >(const char *ta, const int *m, const int *n, const double *alpha, const double *A, const int *lda, const double *x, const int *incx, const double *beta, double *y, const int *incy)
Definition: mat_gblas.h:693
double dot< double >(const int *n, const double *dx, const int *incx, const double *dy, const int *incy)
Definition: mat_gblas.h:755
void axpy< float >(const int *n, const float *da, const float *dx, const int *incx, float *dy, const int *incy)
Definition: mat_gblas.h:1109
int template_lapack_potrf(const char *uplo, const integer *n, Treal *a, const integer *lda, integer *info)
Definition: template_lapack_potrf.h:43
void dgemm_(const char *ta, const char *tb, const int *n, const int *k, const int *l, const double *alpha, const double *A, const int *lda, const double *B, const int *ldb, const double *beta, double *C, const int *ldc)
int template_lapack_pocon(const char *uplo, const integer *n, const Treal *a, const integer *lda, const Treal *anorm, Treal *rcond, Treal *work, integer *iwork, integer *info)
Definition: template_lapack_pocon.h:42
void sygv< double >(const int *itype, const char *jobz, const char *uplo, const int *n, double *A, const int *lda, double *B, const int *ldb, double *w, double *work, const int *lwork, int *info)
Definition: mat_gblas.h:522
void dggev_(const char *jobbl, const char *jobvr, const int *n, double *A, const int *lda, double *B, const int *ldb, double *alphar, double *alphai, double *beta, double *vl, const int *ldvl, double *vr, const int *ldvr, double *work, const int *lwork, int *info)
void dsymv_(const char *uplo, const int *n, const double *alpha, const double *A, const int *lda, const double *x, const int *incx, const double *beta, double *y, const int *incy)
void pocon< float >(const char *uplo, const int *n, const float *A, const int *lda, const float *anorm, float *rcond, float *work, int *iwork, int *info)
Definition: mat_gblas.h:956
int template_blas_trmm(const char *side, const char *uplo, const char *transa, const char *diag, const integer *m, const integer *n, const Treal *alpha, const Treal *a, const integer *lda, Treal *b, const integer *ldb)
Definition: template_blas_trmm.h:43
void saxpy_(const int *n, const float *da, const float *dx, const int *incx, float *dy, const int *incy)
void symm< double >(const char *side, const char *uplo, const int *m, const int *n, const double *alpha, const double *A, const int *lda, const double *B, const int *ldb, const double *beta, double *C, const int *ldc)
Definition: mat_gblas.h:600
static void symm(const char *side, const char *uplo, const int *m, const int *n, const T *alpha, const T *A, const int *lda, const T *B, const int *ldb, const T *beta, T *C, const int *ldc)
Definition: mat_gblas.h:342
static void gemm(const char *ta, const char *tb, const int *n, const int *k, const int *l, const T *alpha, const T *A, const int *lda, const T *B, const int *ldb, const T *beta, T *C, const int *ldc)
Definition: mat_gblas.h:232
void stevr< float >(const char *jobz, const char *range, const int *n, float *d, float *e, const float *vl, const float *vu, const int *il, const int *iu, const float *abstol, int *m, float *w, float *z, const int *ldz, int *isuppz, float *work, int *lwork, int *iwork, int *liwork, int *info)
Definition: mat_gblas.h:992
static void pocon(const char *uplo, const int *n, const T *A, const int *lda, const T *anorm, T *rcond, T *work, int *iwork, int *info)
Definition: mat_gblas.h:351
static void spgst(const int *itype, const char *uplo, const int *n, T *ap, const T *bp, int *info)
Definition: mat_gblas.h:268
int template_blas_trmv(const char *uplo, const char *trans, const char *diag, const integer *n, const Treal *a, const integer *lda, Treal *x, const integer *incx)
Definition: template_blas_trmv.h:42
static void trmv(const char *uplo, const char *trans, const char *diag, const int *n, const T *A, const int *lda, T *x, const int *incx)
Definition: mat_gblas.h:409
void ssymm_(const char *side, const char *uplo, const int *m, const int *n, const float *alpha, const float *A, const int *lda, const float *B, const int *ldb, const float *beta, float *C, const int *ldc)
void ssyev_(const char *jobz, const char *uplo, const int *n, float *a, const int *lda, float *w, float *work, const int *lwork, int *info)
static void syrk(const char *uplo, const char *trans, const int *n, const int *k, const T *alpha, const T *A, const int *lda, const T *beta, T *C, const int *ldc)
Definition: mat_gblas.h:334
void ssygv_(const int *itype, const char *jobz, const char *uplo, const int *n, float *A, const int *lda, float *B, const int *ldb, float *w, float *work, const int *lwork, int *info)
Definition: allocate.cc:39
void ggev< float >(const char *jobbl, const char *jobvr, const int *n, float *A, const int *lda, float *B, const int *ldb, float *alphar, float *alphai, float *beta, float *vl, const int *ldvl, float *vr, const int *ldvr, float *work, const int *lwork, int *info)
Definition: mat_gblas.h:878
void scal< double >(const int *n, const double *da, double *dx, const int *incx)
Definition: mat_gblas.h:742
void daxpy_(const int *n, const double *da, const double *dx, const int *incx, double *dy, const int *incy)
void sscal_(const int *n, const float *da, float *dx, const int *incx)
int template_lapack_spgst(const integer *itype, const char *uplo, const integer *n, Treal *ap, const Treal *bp, integer *info)
Definition: template_lapack_spgst.h:43
void syev< double >(const char *jobz, const char *uplo, const int *n, double *a, const int *lda, double *w, double *work, const int *lwork, int *info)
Definition: mat_gblas.h:677
int template_blas_syrk(const char *uplo, const char *trans, const integer *n, const integer *k, const Treal *alpha, const Treal *a, const integer *lda, const Treal *beta, Treal *c__, const integer *ldc)
Definition: template_blas_syrk.h:42
void axpy< double >(const int *n, const double *da, const double *dx, const int *incx, double *dy, const int *incy)
Definition: mat_gblas.h:770
void ssymv_(const char *uplo, const int *n, const float *alpha, const float *A, const int *lda, const float *x, const int *incx, const float *beta, float *y, const int *incy)
Generalized matrix matrix multiplication using SSE intrinsics.
void tptri< double >(const char *uplo, const char *diag, const int *n, double *ap, int *info)
Definition: mat_gblas.h:492
int template_blas_symm(const char *side, const char *uplo, const integer *m, const integer *n, const Treal *alpha, const Treal *a, const integer *lda, const Treal *b, const integer *ldb, const Treal *beta, Treal *c__, const integer *ldc)
Definition: template_blas_symm.h:42
void strmm_(const char *side, const char *uplo, const char *transa, const char *diag, const int *m, const int *n, const float *alpha, const float *A, const int *lda, float *B, const int *ldb)
void strtri_(const char *uplo, const char *diag, const int *n, float *A, const int *lda, int *info)
void gemm< float >(const char *ta, const char *tb, const int *n, const int *k, const int *l, const float *alpha, const float *A, const int *lda, const float *B, const int *ldb, const float *beta, float *C, const int *ldc)
Definition: mat_gblas.h:785
static void trtri(const char *uplo, const char *diag, const int *n, T *A, const int *lda, int *info)
Definition: mat_gblas.h:321
static void sygv(const int *itype, const char *jobz, const char *uplo, const int *n, T *A, const int *lda, T *B, const int *ldb, T *w, T *work, const int *lwork, int *info)
Definition: mat_gblas.h:293
void sygv< float >(const int *itype, const char *jobz, const char *uplo, const int *n, float *A, const int *lda, float *B, const int *ldb, float *w, float *work, const int *lwork, int *info)
Definition: mat_gblas.h:861
void spgst< double >(const int *itype, const char *uplo, const int *n, double *ap, const double *bp, int *info)
Definition: mat_gblas.h:478
static T dot(const int *n, const T *dx, const int *incx, const T *dy, const int *incy)
Definition: mat_gblas.h:425
void dsyev_(const char *jobz, const char *uplo, const int *n, double *a, const int *lda, double *w, double *work, const int *lwork, int *info)
void scal< float >(const int *n, const float *da, float *dx, const int *incx)
Definition: mat_gblas.h:1078
void dsyrk_(const char *uplo, const char *trans, const int *n, const int *k, const double *alpha, const double *A, const int *lda, const double *beta, double *C, const int *ldc)
static void potrf(const char *uplo, const int *n, T *A, const int *lda, int *info)
Definition: mat_gblas.h:314
static void syev(const char *jobz, const char *uplo, const int *n, T *a, const int *lda, T *w, T *work, const int *lwork, int *info)
Definition: mat_gblas.h:382
static void axpy(const int *n, const T *da, const T *dx, const int *incx, T *dy, const int *incy)
Definition: mat_gblas.h:431
static float time
Definition: mat_gblas.h:226
void syrk< float >(const char *uplo, const char *trans, const int *n, const int *k, const float *alpha, const float *A, const int *lda, const float *beta, float *C, const int *ldc)
Definition: mat_gblas.h:924
void dscal_(const int *n, const double *da, double *dx, const int *incx)
void syev< float >(const char *jobz, const char *uplo, const int *n, float *a, const int *lda, float *w, float *work, const int *lwork, int *info)
Definition: mat_gblas.h:1014
void dspgst_(const int *itype, const char *uplo, const int *n, double *ap, const double *bp, int *info)
int template_lapack_trtri(const char *uplo, const char *diag, const integer *n, Treal *a, const integer *lda, integer *info)
Definition: template_lapack_trtri.h:42
void trtri< float >(const char *uplo, const char *diag, const int *n, float *A, const int *lda, int *info)
Definition: mat_gblas.h:911
void ssyrk_(const char *uplo, const char *trans, const int *n, const int *k, const float *alpha, const float *A, const int *lda, const float *beta, float *C, const int *ldc)
static void symv(const char *uplo, const int *n, const T *alpha, const T *A, const int *lda, const T *x, const int *incx, const T *beta, T *y, const int *incy)
Definition: mat_gblas.h:400
void dtrtri_(const char *uplo, const char *diag, const int *n, double *A, const int *lda, int *info)
static void ggev(const char *jobbl, const char *jobvr, const int *n, T *A, const int *lda, T *B, const int *ldb, T *alphar, T *alphai, T *beta, T *vl, const int *ldvl, T *vr, const int *ldvr, T *work, const int *lwork, int *info)
Definition: mat_gblas.h:301
int template_lapack_pptrf(const char *uplo, const integer *n, Treal *ap, integer *info)
Definition: template_lapack_pptrf.h:43
int template_lapack_sygv(const integer *itype, const char *jobz, const char *uplo, const integer *n, Treal *a, const integer *lda, Treal *b, const integer *ldb, Treal *w, Treal *work, const integer *lwork, integer *info)
Definition: template_lapack_sygv.h:42
void stptri_(const char *uplo, const char *diag, const int *n, float *ap, int *info)
void stevx< double >(const char *jobz, const char *range, const int *n, double *d, double *e, const double *vl, const double *vu, const int *il, const int *iu, const double *abstol, int *m, double *w, double *z, const int *ldz, double *work, int *iwork, int *ifail, int *info)
Definition: mat_gblas.h:632
void ggev< double >(const char *jobbl, const char *jobvr, const int *n, double *A, const int *lda, double *B, const int *ldb, double *alphar, double *alphai, double *beta, double *vl, const int *ldvl, double *vr, const int *ldvr, double *work, const int *lwork, int *info)
Definition: mat_gblas.h:539
void potrf< double >(const char *uplo, const int *n, double *A, const int *lda, int *info)
Definition: mat_gblas.h:559
void dpptrf_(const char *uplo, const int *n, double *ap, int *info)
void spotrf_(const char *uplo, const int *n, float *A, const int *lda, int *info)
void gemm< double >(const char *ta, const char *tb, const int *n, const int *k, const int *l, const double *alpha, const double *A, const int *lda, const double *B, const int *ldb, const double *beta, double *C, const int *ldc)
Definition: mat_gblas.h:447
static void trmm(const char *side, const char *uplo, const char *transa, const char *diag, const int *m, const int *n, const T *alpha, const T *A, const int *lda, T *B, const int *ldb)
Definition: mat_gblas.h:281
void dtptri_(const char *uplo, const char *diag, const int *n, double *ap, int *info)
int template_lapack_tptri(const char *uplo, const char *diag, const integer *n, Treal *ap, integer *info)
Definition: template_lapack_tptri.h:43
void tptri< float >(const char *uplo, const char *diag, const int *n, float *ap, int *info)
Definition: mat_gblas.h:830
void dsymm_(const char *side, const char *uplo, const int *m, const int *n, const double *alpha, const double *A, const int *lda, const double *B, const int *ldb, const double *beta, double *C, const int *ldc)
void dpotrf_(const char *uplo, const int *n, double *A, const int *lda, int *info)
void pocon< double >(const char *uplo, const int *n, const double *A, const int *lda, const double *anorm, double *rcond, double *work, int *iwork, int *info)
Definition: mat_gblas.h:617
void dstevx_(const char *jobz, const char *range, const int *n, double *d, double *e, const double *vl, const double *vu, const int *il, const int *iu, const double *abstol, int *m, double *w, double *z, const int *ldz, double *work, int *iwork, int *ifail, int *info)
void stevr< double >(const char *jobz, const char *range, const int *n, double *d, double *e, const double *vl, const double *vu, const int *il, const int *iu, const double *abstol, int *m, double *w, double *z, const int *ldz, int *isuppz, double *work, int *lwork, int *iwork, int *liwork, int *info)
Definition: mat_gblas.h:653
void trmv< double >(const char *uplo, const char *trans, const char *diag, const int *n, const double *A, const int *lda, double *x, const int *incx)
Definition: mat_gblas.h:725
int template_blas_gemv(const char *trans, const integer *m, const integer *n, const Treal *alpha, const Treal *a, const integer *lda, const Treal *x, const integer *incx, const Treal *beta, Treal *y, const integer *incy)
Definition: template_blas_gemv.h:43
void dstevr_(const char *jobz, const char *range, const int *n, double *d, double *e, const double *vl, const double *vu, const int *il, const int *iu, const double *abstol, int *m, double *w, double *z, const int *ldz, int *isuppz, double *work, int *lwork, int *iwork, int *liwork, int *info)
static void scal(const int *n, const T *da, T *dx, const int *incx)
Definition: mat_gblas.h:419
side
Definition: Matrix.h:75
int template_lapack_syev(const char *jobz, const char *uplo, const integer *n, Treal *a, const integer *lda, Treal *w, Treal *work, const integer *lwork, integer *info)
Definition: template_lapack_syev.h:42
Code for managing aligned memory buffers, used if SSE intrinsics enabled.
static void fulltopacked(const Treal *full, Treal *packed, const int size)
Definition: mat_gblas.h:1135
static void gemv(const char *ta, const int *m, const int *n, const T *alpha, const T *A, const int *lda, const T *x, const int *incx, const T *beta, T *y, const int *incy)
Definition: mat_gblas.h:391
int template_blas_axpy(const integer *n, const Treal *da, const Treal *dx, const integer *incx, Treal *dy, const integer *incy)
Definition: template_blas_axpy.h:43
double ddot_(const int *n, const double *dx, const int *incx, const double *dy, const int *incy)
void trtri< double >(const char *uplo, const char *diag, const int *n, double *A, const int *lda, int *info)
Definition: mat_gblas.h:572
void sspgst_(const int *itype, const char *uplo, const int *n, float *ap, const float *bp, int *info)
void sstevr_(const char *jobz, const char *range, const int *n, float *d, float *e, const float *vl, const float *vu, const int *il, const int *iu, const float *abstol, int *m, float *w, float *z, const int *ldz, int *isuppz, float *work, int *lwork, int *iwork, int *liwork, int *info)
static void stevr(const char *jobz, const char *range, const int *n, T *d, T *e, const T *vl, const T *vu, const int *il, const int *iu, const T *abstol, int *m, T *w, T *z, const int *ldz, int *isuppz, T *work, int *lwork, int *iwork, int *liwork, int *info)
Definition: mat_gblas.h:369
void spocon_(const char *uplo, const int *n, const float *A, const int *lda, const float *anorm, float *rcond, float *work, int *iwork, int *info)
static void pptrf(const char *uplo, const int *n, T *ap, int *info)
Definition: mat_gblas.h:263
void trmm< double >(const char *side, const char *uplo, const char *transa, const char *diag, const int *m, const int *n, const double *alpha, const double *A, const int *lda, double *B, const int *ldb)
Definition: mat_gblas.h:505
void syrk< double >(const char *uplo, const char *trans, const int *n, const int *k, const double *alpha, const double *A, const int *lda, const double *beta, double *C, const int *ldc)
Definition: mat_gblas.h:585
int template_blas_symv(const char *uplo, const integer *n, const Treal *alpha, const Treal *a, const integer *lda, const Treal *x, const integer *incx, const Treal *beta, Treal *y, const integer *incy)
Definition: template_blas_symv.h:42
static void tptri(const char *uplo, const char *diag, const int *n, T *ap, int *info)
Definition: mat_gblas.h:275
void dgemv_(const char *ta, const int *m, const int *n, const double *alpha, const double *A, const int *lda, const double *x, const int *incx, const double *beta, double *y, const int *incy)
int template_lapack_stevr(const char *jobz, const char *range, const integer *n, Treal *d__, Treal *e, const Treal *vl, const Treal *vu, const integer *il, const integer *iu, const Treal *abstol, integer *m, Treal *w, Treal *z__, const integer *ldz, integer *isuppz, Treal *work, integer *lwork, integer *iwork, integer *liwork, integer *info)
Definition: template_lapack_stevr.h:41
void symv< double >(const char *uplo, const int *n, const double *alpha, const double *A, const int *lda, const double *x, const int *incx, const double *beta, double *y, const int *incy)
Definition: mat_gblas.h:709
int template_blas_gemm(const char *transa, const char *transb, const integer *m, const integer *n, const integer *k, const Treal *alpha, const Treal *a, const integer *lda, const Treal *b, const integer *ldb, const Treal *beta, Treal *c__, const integer *ldc)
Definition: template_blas_gemm.h:43
void sgemm_(const char *ta, const char *tb, const int *n, const int *k, const int *l, const float *alpha, const float *A, const int *lda, const float *B, const int *ldb, const float *beta, float *C, const int *ldc)
#define B
static void stevx(const char *jobz, const char *range, const int *n, T *d, T *e, const T *vl, const T *vu, const int *il, const int *iu, const T *abstol, int *m, T *w, T *z, const int *ldz, T *work, int *iwork, int *ifail, int *info)
Definition: mat_gblas.h:358
void trmv< float >(const char *uplo, const char *trans, const char *diag, const int *n, const float *A, const int *lda, float *x, const int *incx)
Definition: mat_gblas.h:1062
void sstevx_(const char *jobz, const char *range, const int *n, float *d, float *e, const float *vl, const float *vu, const int *il, const int *iu, const float *abstol, int *m, float *w, float *z, const int *ldz, float *work, int *iwork, int *ifail, int *info)
int template_lapack_ggev(const char *jobvl, const char *jobvr, const integer *n, Treal *a, const integer *lda, Treal *b, const integer *ldb, Treal *alphar, Treal *alphai, Treal *beta, Treal *vl, const integer *ldvl, Treal *vr, const integer *ldvr, Treal *work, const integer *lwork, integer *info)
Definition: template_lapack_ggev.h:42
void symv< float >(const char *uplo, const int *n, const float *alpha, const float *A, const int *lda, const float *x, const int *incx, const float *beta, float *y, const int *incy)
Definition: mat_gblas.h:1046
void sggev_(const char *jobbl, const char *jobvr, const int *n, float *A, const int *lda, float *B, const int *ldb, float *alphar, float *alphai, float *beta, float *vl, const int *ldvl, float *vr, const int *ldvr, float *work, const int *lwork, int *info)
int template_lapack_stevx(const char *jobz, const char *range, const integer *n, Treal *d__, Treal *e, const Treal *vl, const Treal *vu, const integer *il, const integer *iu, const Treal *abstol, integer *m, Treal *w, Treal *z__, const integer *ldz, Treal *work, integer *iwork, integer *ifail, integer *info)
Definition: template_lapack_stevx.h:42
The Failure class is used for exception handling.
static void tripackedtofull(const Treal *packed, Treal *full, const int size)
Definition: mat_gblas.h:1170
Treal template_blas_dot(const integer *n, const Treal *dx, const integer *incx, const Treal *dy, const integer *incy)
Definition: template_blas_dot.h:43
static void packedtofull(const Treal *packed, Treal *full, const int size)
Definition: mat_gblas.h:1148
void strmv_(const char *uplo, const char *trans, const char *diag, const int *n, const float *A, const int *lda, float *x, const int *incx)