#include <mm_kernel_inner_sse2_A.h>
|
static void | exec (PtrType X, PtrTypePacked X_packed, int const row_k, int const col_k, int const rows_total_matrix, int const cols_total_matrix) |
|
static void | exec (real const *const *const A, real const *const *const B, real *const C, int const i=1, int const offset_A=0, int const offset_B=0, int const offset_C=0) |
| Executes the matrix-matrix multiply C += A B with the three matrices A, B, and C stored according to the static members and typedefs of this class.
|
|
static void | exec (real const *const *const A, real const *const *const B, real *const C, int const i=1) |
|
|
static int const | M |
| Number of rows of A and C.
|
|
static int const | N |
| Number of columns of B and C.
|
|
static int const | K |
| Number of columns of A and rows of B.
|
|
◆ Pack_type_A
Type that can (should) be used to pack A.
◆ Pack_type_B
Type that can (should) be used to pack B.
◆ Pack_type_C
Type that can (should) be used to pack C.
◆ PtrType
template<typename T_real, typename T_reg, int T_M, int T_N, int T_K>
template<int T_rows, int T_cols, typename T_ordering_kernel, int T_repetitions>
template<typename T_ordering_matrix>
typedef real const* const MM_kernel_inner_sse2_A< T_real, T_reg, T_M, T_N, T_K >::Pack< T_rows, T_cols, T_ordering_kernel, T_repetitions >::Assign_to_packed< T_ordering_matrix >::PtrType |
Type of matrix pointer - note the presence of const qualifiers.
◆ PtrTypePacked
template<typename T_real, typename T_reg, int T_M, int T_N, int T_K>
template<int T_rows, int T_cols, typename T_ordering_kernel, int T_repetitions>
template<typename T_ordering_matrix>
typedef real* MM_kernel_inner_sse2_A< T_real, T_reg, T_M, T_N, T_K >::Pack< T_rows, T_cols, T_ordering_kernel, T_repetitions >::Assign_to_packed< T_ordering_matrix >::PtrTypePacked |
Type of packed pointer - note the absence of const qualifiers.
◆ real
Real number type (usually float or double)
◆ exec() [1/3]
template<typename T_real, typename T_reg, int T_M, int T_N, int T_K>
template<int T_rows, int T_cols, typename T_ordering_kernel, int T_repetitions>
template<typename T_ordering_matrix>
static void MM_kernel_inner_sse2_A< T_real, T_reg, T_M, T_N, T_K >::Pack< T_rows, T_cols, T_ordering_kernel, T_repetitions >::Assign_to_packed< T_ordering_matrix >::exec |
( |
PtrType | X, |
|
|
PtrTypePacked | X_packed, |
|
|
int const | row_k, |
|
|
int const | col_k, |
|
|
int const | rows_total_matrix, |
|
|
int const | cols_total_matrix ) |
|
inlinestatic |
◆ exec() [2/3]
◆ exec() [3/3]
void MM_kernel_inner_sse2_A< real, T_reg, T_M, T_N, T_K >::exec |
( |
real const *const *const | A, |
|
|
real const *const *const | B, |
|
|
real *const | C, |
|
|
int const | i = 1, |
|
|
int const | offset_A = 0, |
|
|
int const | offset_B = 0, |
|
|
int const | offset_C = 0 ) |
|
static |
Executes the matrix-matrix multiply C += A B with the three matrices A, B, and C stored according to the static members and typedefs of this class.
◆ floats_per_register
Number of real numbers that fit in one register.
Number of columns of A and rows of B.
Number of rows of A and C.
Number of columns of B and C.
The documentation for this struct was generated from the following file: