ergo
MM_kernel_outer_A< T_gemm_kernel, T_M_block, T_N_block > Class Template Reference

Template for matrix matrix multiplication that wraps around a kernel given as template argument. More...

#include <mm_kernel_outer_A.h>

Classes

class  Pack
 Template for for translations between unpacked and packed matrix storage. More...
 

Public Types

typedef T_gemm_kernel::real real
 Real number type (usually float or double) More...
 
typedef Ordering_col_wise Ordering_block_A
 
typedef Ordering_col_wise Ordering_block_B
 
typedef Ordering_col_wise Ordering_block_C
 
typedef Pack< M_block, K_block, Ordering_block_A, typename T_gemm_kernel::Pack_type_A > Pack_type_A
 
typedef Pack< K_block, N_block, Ordering_block_B, typename T_gemm_kernel::Pack_type_B > Pack_type_B
 
typedef Pack< M_block, N_block, Ordering_block_C, typename T_gemm_kernel::Pack_type_C > Pack_type_C
 

Static Public Member Functions

static void exec (real const *const *const A, real const *const *const B, real *const C, int const i=1)
 Executes the matrix-matrix multiply C += A B with the three matrices A, B, and C stored using the packing types of this class. More...
 

Static Public Attributes

static int const M_kernel = T_gemm_kernel::M
 Number of rows of A and C kernels. More...
 
static int const N_kernel = T_gemm_kernel::N
 Number of columns of B and C kernels. More...
 
static int const K_kernel = T_gemm_kernel::K
 Number of columns of A kernels and rows of B kernels. More...
 
static int const M_block = T_M_block
 Number of rows of A and C (blocks). More...
 
static int const N_block = T_N_block
 Number of columns of B and C (blocks). More...
 
static int const K_block = 1
 Number of columns of A and rows of B (blocks). More...
 
static int const M = M_kernel * M_block
 Number of rows of A and C. More...
 
static int const N = N_kernel * N_block
 Number of columns of B and C. More...
 
static int const K = K_kernel * K_block
 Number of columns of A and rows of B. More...
 

Detailed Description

template<typename T_gemm_kernel, int T_M_block, int T_N_block>
class MM_kernel_outer_A< T_gemm_kernel, T_M_block, T_N_block >

Template for matrix matrix multiplication that wraps around a kernel given as template argument.

The idea is that the inner kernel should be fully unrolled and block for registers.

Member Typedef Documentation

◆ Ordering_block_A

template<typename T_gemm_kernel , int T_M_block, int T_N_block>
typedef Ordering_col_wise MM_kernel_outer_A< T_gemm_kernel, T_M_block, T_N_block >::Ordering_block_A

◆ Ordering_block_B

template<typename T_gemm_kernel , int T_M_block, int T_N_block>
typedef Ordering_col_wise MM_kernel_outer_A< T_gemm_kernel, T_M_block, T_N_block >::Ordering_block_B

◆ Ordering_block_C

template<typename T_gemm_kernel , int T_M_block, int T_N_block>
typedef Ordering_col_wise MM_kernel_outer_A< T_gemm_kernel, T_M_block, T_N_block >::Ordering_block_C

◆ Pack_type_A

template<typename T_gemm_kernel , int T_M_block, int T_N_block>
typedef Pack< M_block, K_block, Ordering_block_A, typename T_gemm_kernel::Pack_type_A > MM_kernel_outer_A< T_gemm_kernel, T_M_block, T_N_block >::Pack_type_A

◆ Pack_type_B

template<typename T_gemm_kernel , int T_M_block, int T_N_block>
typedef Pack< K_block, N_block, Ordering_block_B, typename T_gemm_kernel::Pack_type_B > MM_kernel_outer_A< T_gemm_kernel, T_M_block, T_N_block >::Pack_type_B

◆ Pack_type_C

template<typename T_gemm_kernel , int T_M_block, int T_N_block>
typedef Pack< M_block, N_block, Ordering_block_C, typename T_gemm_kernel::Pack_type_C > MM_kernel_outer_A< T_gemm_kernel, T_M_block, T_N_block >::Pack_type_C

◆ real

template<typename T_gemm_kernel , int T_M_block, int T_N_block>
typedef T_gemm_kernel::real MM_kernel_outer_A< T_gemm_kernel, T_M_block, T_N_block >::real

Real number type (usually float or double)

Member Function Documentation

◆ exec()

template<typename T_gemm_kernel , int T_M_block, int T_N_block>
void MM_kernel_outer_A< T_gemm_kernel, T_M_block, T_N_block >::exec ( real const *const *const  A,
real const *const *const  B,
real *const  C,
int const  i = 1 
)
static

Executes the matrix-matrix multiply C += A B with the three matrices A, B, and C stored using the packing types of this class.

References A, and B.

Member Data Documentation

◆ K

template<typename T_gemm_kernel , int T_M_block, int T_N_block>
int const MM_kernel_outer_A< T_gemm_kernel, T_M_block, T_N_block >::K = K_kernel * K_block
static

Number of columns of A and rows of B.

◆ K_block

template<typename T_gemm_kernel , int T_M_block, int T_N_block>
int const MM_kernel_outer_A< T_gemm_kernel, T_M_block, T_N_block >::K_block = 1
static

Number of columns of A and rows of B (blocks).

◆ K_kernel

template<typename T_gemm_kernel , int T_M_block, int T_N_block>
int const MM_kernel_outer_A< T_gemm_kernel, T_M_block, T_N_block >::K_kernel = T_gemm_kernel::K
static

Number of columns of A kernels and rows of B kernels.

◆ M

template<typename T_gemm_kernel , int T_M_block, int T_N_block>
int const MM_kernel_outer_A< T_gemm_kernel, T_M_block, T_N_block >::M = M_kernel * M_block
static

Number of rows of A and C.

◆ M_block

template<typename T_gemm_kernel , int T_M_block, int T_N_block>
int const MM_kernel_outer_A< T_gemm_kernel, T_M_block, T_N_block >::M_block = T_M_block
static

Number of rows of A and C (blocks).

◆ M_kernel

template<typename T_gemm_kernel , int T_M_block, int T_N_block>
int const MM_kernel_outer_A< T_gemm_kernel, T_M_block, T_N_block >::M_kernel = T_gemm_kernel::M
static

Number of rows of A and C kernels.

◆ N

template<typename T_gemm_kernel , int T_M_block, int T_N_block>
int const MM_kernel_outer_A< T_gemm_kernel, T_M_block, T_N_block >::N = N_kernel * N_block
static

Number of columns of B and C.

◆ N_block

template<typename T_gemm_kernel , int T_M_block, int T_N_block>
int const MM_kernel_outer_A< T_gemm_kernel, T_M_block, T_N_block >::N_block = T_N_block
static

Number of columns of B and C (blocks).

◆ N_kernel

template<typename T_gemm_kernel , int T_M_block, int T_N_block>
int const MM_kernel_outer_A< T_gemm_kernel, T_M_block, T_N_block >::N_kernel = T_gemm_kernel::N
static

Number of columns of B and C kernels.


The documentation for this class was generated from the following file: