INLA_DIST
Loading...
Searching...
No Matches
Public Member Functions | Private Member Functions | Private Attributes | List of all members
BTA< T > Class Template Reference

Template class for Block Triangular Arrowhead Solver. More...

#include <BTA.H>

Public Member Functions

 BTA (size_t ns, size_t nt, size_t nd, int GPU_rank_)
 Constructor for BTA Solver.
 
 ~BTA ()
 Destructor for BTA class.
 
double factorize (size_t *ia, size_t *ja, T *a, double &t_firstStageFactor)
 Perform factorization of sparse matrix in CSC format.
 
double factorize_noCopyHost (size_t *ia, size_t *ja, T *a, T &logDet)
 Perform factorization on the given matrix without copying factor back to host.
 
double factorizeSolve (size_t *ia, size_t *ja, T *a, T *x, T *rhs, size_t nrhs, double &t_firstSecondStage, double &t_SecondStageBackPass)
 Perform factorization and solve the linear system for multiple right-hand sides. forward substitution of solve already happens on GPU, while the factorization is being computed.
 
double solve (size_t *ia, size_t *ja, T *a, T *rhs, size_t nrhs, double &t_secondStageForwardPass, double &t_secondStageBackwardPass)
 Solve the linear system when cholesky factor is already computed.
 
double solve (size_t *, size_t *, T *, T *, T *, size_t, double &t_secondStageForwardPass, double &t_secondStageBackwardPass)
 Solve the linear system for a single right-hand side.
 
double solve_s (size_t *ia, size_t *ja, double *a, double *x, double *rhs, size_t nrhs)
 single precision solve but assuming double precision input.
 
double solve_d (size_t *, size_t *, float *, float *, float *, size_t)
 double precision solve but assuming single precision input.
 
double BTAdiag (size_t *ia, size_t *ja, T *a, T *diag)
 compute selected inverse. return only the diagonal of the inverse.
 
double BTAinvBlks (size_t *, size_t *, T *, T *)
 
double BTAselInv (size_t *ia, size_t *ja, T *a, T *invQ)
 compute selected inverse. return all elements of the inverse that were nonzero in Q and are within the diagonal block.
 
logDet (size_t *ia, size_t *ja, T *a)
 compute log determinant.
 
double residualNorm (T *x, T *b)
 compute residual norm as || r || = || b - A*x ||.
 
double residualNormNormalized (T *x, T *b)
 compute residual norm as || rel r || = || b - A*x || / || b ||.
 
double flop_count_factorise ()
 

Private Member Functions

size_t mf_block_index (size_t, size_t)
 
size_t mf_block_lda (size_t, size_t)
 
size_t mf_dense_block_index (size_t)
 
size_t mf_dense_block_offset (size_t)
 
size_t mf_dense_block_lda (size_t)
 
size_t invblks_diag_block_index (size_t)
 
size_t invblks_dense_block_index (size_t)
 
double FirstStageFactor ()
 
double FirstSecondStageFactor (size_t nhrs)
 
double FirstStageFactor_noCopyHost (T &logDet)
 
double FirstStageFactor_noCopyHost_testV (double &logDet)
 
double ForwardPassSolve (size_t)
 
double BackwardPassSolve (size_t)
 
double SecondStageSolve (size_t, double &t_secondStageForwardPass, double &t_secondStageBackwardPass)
 
double SecondStageSolve_s (size_t, float *rhs_s)
 
double SecondStageSolve_d (size_t, double *rhs_d)
 
double ThirdStageBTA (T *, T *, int)
 
void initialize_MF_host ()
 
void initialize_invBlks_host ()
 
void get_max_supernode_nnz ()
 
void init_supernode (T *M_dev, size_t supernode, cudaStream_t stream)
 
void copy_supernode_to_host (T *M_dev, size_t supernode, cudaStream_t stream)
 
void extract_nnzA (T *M_dev, size_t supernode)
 
void copy_supernode_to_host_write (T *M_dev, size_t supernode)
 
void copy_supernode_to_device (T *M_dev, size_t supernode, cudaStream_t stream)
 
void copy_supernode_diag (T *src, size_t supernode)
 
void swap_pointers (T **ptr1, T **ptr2)
 
f_one ()
 
f_zero ()
 
CPX f_one ()
 
float f_one ()
 
CPX f_zero ()
 
float f_zero ()
 

Private Attributes

size_t * matrix_ia
 
size_t * matrix_ja
 
T * matrix_a
 
size_t matrix_size
 
size_t matrix_n_nonzeros
 
size_t matrix_ns
 
size_t matrix_nt
 
size_t matrix_nd
 
size_t * Bmin
 
size_t * Bmax
 
size_t NBlock
 
size_t * diag_pos
 
size_t max_supernode_nnz = 0
 
size_t ind_invBlks_fi
 
size_t mem_alloc_dev = 0
 
int GPU_rank
 
bool MF_allocated = false
 
bool invBlks_allocated = false
 
bool MF_dev_allocated = false
 
bool factorization_completed = false
 
int cpy_indicator
 
magma_queue_t magma_queue_1
 
magma_queue_t magma_queue_2
 
cudaStream_t stream_c
 
cudaStream_t copyStream = NULL
 
cudaStream_t magma_cudaStream_1 = NULL
 
cudaStream_t magma_cudaStream_2 = NULL
 
cudaEvent_t initBlock_dev_ev
 
cudaEvent_t potrf_dev_ev
 
magma_event_t potrf_dev_magma_ev
 
void * cublas_handle
 
int * info_cuda = NULL
 
int * cuda_buffer_flag_potrf
 
int * cuda_buffer_flag_trtri
 
cusolverDnHandle_t * handle
 
cusolverDnParams_t * params
 
size_t * dev_size
 
size_t * host_size
 
double * mem_cuda_dev
 
double * mem_cuda_host
 
T * MF
 
T * invBlks
 
T * inv_a
 
T * blockR_dev
 
T * blockM_dev
 
T * blockDense_dev
 
T * rhs
 
T * rhs_dev
 
size_t * ia_dev
 
size_t * ja_dev
 
T * a_dev
 
size_t * inv_ia_dev
 
size_t * inv_ja_dev
 
T * inv_a_dev
 
T * diag_dev
 
size_t * diag_pos_dev
 

Detailed Description

template<class T>
class BTA< T >

Template class for Block Triangular Arrowhead Solver.

Template Parameters
TData type of the matrix elements. Choices are double, single (and some remainders of complex)

Constructor & Destructor Documentation

◆ BTA()

template<class T >
BTA< T >::BTA ( size_t  ns,
size_t  nt,
size_t  nd,
int  GPU_rank_ 
)

Constructor for BTA Solver.

Parameters
nsnumber of spatial nodes.
ntnumber of timesteps.
ndnumber of fixed effects.
GPU_rank_GPU rank.

Member Function Documentation

◆ BTAdiag()

template<class T >
double BTA< T >::BTAdiag ( size_t *  ia,
size_t *  ja,
T *  a,
T *  diag 
)

compute selected inverse. return only the diagonal of the inverse.

Parameters
[in]iaArray of type size_t storing the row pointers of the matrix.
[in]jaArray of type size_t storing the column indices of the matrix.
[in]aArray of type T storing the values of the matrix.
[in,out]diagArray of type T diagonal of the inverse.
Returns
number of GFLOP/S.

◆ BTAselInv()

template<class T >
double BTA< T >::BTAselInv ( size_t *  ia,
size_t *  ja,
T *  a,
T *  invQ 
)

compute selected inverse. return all elements of the inverse that were nonzero in Q and are within the diagonal block.

Parameters
[in]iaArray of type size_t storing the row pointers of the matrix.
[in]jaArray of type size_t storing the column indices of the matrix.
[in]aArray of type T storing the values of the matrix.
[in,out]invQArray of type T diagonal of the inverse.
Returns
number of GFLOP/S.

◆ factorize()

template<class T >
double BTA< T >::factorize ( size_t *  ia,
size_t *  ja,
T *  a,
double &  t_firstStageFactor 
)

Perform factorization of sparse matrix in CSC format.

Parameters
[in]iaArray of type size_t storing the row pointers of the matrix.
[in]jaArray of type size_t storing the column indices of the matrix.
[in]aArray of type T storing the values of the matrix.
[in,out]t_firstStageFactortime first state Factor.
Returns
number of GFLOP/S.

◆ factorize_noCopyHost()

template<class T >
double BTA< T >::factorize_noCopyHost ( size_t *  ia,
size_t *  ja,
T *  a,
T &  logDet 
)

Perform factorization on the given matrix without copying factor back to host.

Parameters
[in]iaArray of type size_t storing the row pointers of the matrix.
[in]jaArray of type size_t storing the column indices of the matrix.
[in]aArray of type T storing the values of the matrix.
[in,out]logDetLog determinant of the matrix.
Returns
number of GFLOP/S.

◆ factorizeSolve()

template<class T >
double BTA< T >::factorizeSolve ( size_t *  ia,
size_t *  ja,
T *  a,
T *  x,
T *  rhs,
size_t  nrhs,
double &  t_firstSecondStage,
double &  t_SecondStageBackPass 
)

Perform factorization and solve the linear system for multiple right-hand sides. forward substitution of solve already happens on GPU, while the factorization is being computed.

Parameters
[in]iaArray of type size_t storing the row pointers of the matrix.
[in]jaArray of type size_t storing the column indices of the matrix.
[in]aArray of type T storing the values of the matrix.
[in,out]xArray of type T storing the solutions.
[in]rhsArray of type T storing the right-hand sides.
[in]nrhsNumber of right-hand sides.
[in,out]t_firstSecondStageTime taken for factorization & forward solve
[in,out]t_SecondStageBackPassTime taken for the backward pass.
Returns
number of GFLOP/S.

◆ logDet()

template<class T >
T BTA< T >::logDet ( size_t *  ia,
size_t *  ja,
T *  a 
)

compute log determinant.

Parameters
[in]iaArray of type size_t storing the row pointers of the matrix.
[in]jaArray of type size_t storing the column indices of the matrix.
[in]aArray of type T storing the values of the matrix.
Returns
log determinant.

◆ residualNorm()

template<class T >
double BTA< T >::residualNorm ( T *  x,
T *  b 
)

compute residual norm as || r || = || b - A*x ||.

Parameters
[in]xArray of type T storing the row pointers of the matrix.
[in]bArray of type T storing the column indices of the matrix.
Returns
|| r || = || b - A*x ||.

◆ residualNormNormalized()

template<class T >
double BTA< T >::residualNormNormalized ( T *  x,
T *  b 
)

compute residual norm as || rel r || = || b - A*x || / || b ||.

Parameters
[in]xArray of type T storing the row pointers of the matrix.
[in]bArray of type T storing the column indices of the matrix.
Returns
|| rel r|| = || b - A*x || / || b || .

◆ solve() [1/2]

template<class T >
double BTA< T >::solve ( size_t *  ia,
size_t *  ja,
T *  a,
T *  x,
T *  b,
size_t  nrhs,
double &  t_secondStageForwardPass,
double &  t_secondStageBackwardPass 
)

Solve the linear system for a single right-hand side.

Parameters
[in]iaArray of type size_t storing the row pointers of the matrix.
[in]jaArray of type size_t storing the column indices of the matrix.
[in]aArray of type T storing the values of the matrix.
[in,out]xArray of type T storing the solution.
[in]rhsArray of type T storing the right-hand side.
[in,out]t_secondStageForwardPassTime taken for the forward pass.
[in,out]t_secondStageBackwardPassTime taken for the backward pass.
Returns
number of GFLOP/S.

◆ solve() [2/2]

template<class T >
double BTA< T >::solve ( size_t *  ia,
size_t *  ja,
T *  a,
T *  rhs,
size_t  nrhs,
double &  t_secondStageForwardPass,
double &  t_secondStageBackwardPass 
)

Solve the linear system when cholesky factor is already computed.

Parameters
[in]iaArray of type size_t storing the row pointers of the matrix.
[in]jaArray of type size_t storing the column indices of the matrix.
[in]aArray of type T storing the values of the matrix.
[in]rhsrhs, gets overwritten by solution.
[in]nrhsnumber of of right-hand sides.
[in,out]t_secondStageForwardPassTime taken for the forward pass.
[in,out]t_secondStageBackwardPassTime taken for the backward pass.
Returns
number of GFLOP/S.

◆ solve_d()

template<class T >
double BTA< T >::solve_d ( size_t *  ia,
size_t *  ja,
float *  a,
float *  x,
float *  b,
size_t  nrhs 
)

double precision solve but assuming single precision input.

Parameters
[in]iaArray of type size_t storing the row pointers of the matrix.
[in]jaArray of type size_t storing the column indices of the matrix.
[in]aArray of type T storing the values of the matrix (single precision).
[in,out]xArray of type T storing the solution.
[in]rhsArray of type T storing the right-hand side (double precision).
[in]nrhsNumber of right-hand sides.
Returns
number of GFLOP/S.

◆ solve_s()

template<class T >
double BTA< T >::solve_s ( size_t *  ia,
size_t *  ja,
double *  a,
double *  x,
double *  rhs,
size_t  nrhs 
)

single precision solve but assuming double precision input.

Parameters
[in]iaArray of type size_t storing the row pointers of the matrix.
[in]jaArray of type size_t storing the column indices of the matrix.
[in]aArray of type T storing the values of the matrix (double precision).
[in,out]xArray of type T storing the solution.
[in]rhsArray of type T storing the right-hand side (single precision).
[in]nrhsNumber of right-hand sides.
Returns
number of GFLOP/S.

Member Data Documentation

◆ GPU_rank

template<class T >
int BTA< T >::GPU_rank
private

GPU rank of the current MPI process.

◆ matrix_a

template<class T >
T* BTA< T >::matrix_a
private

array of type T storing the values of the sparse input matrix.

◆ matrix_ia

template<class T >
size_t* BTA< T >::matrix_ia
private

array of type size_t storing the row pointers of the sparse input matrix.

◆ matrix_ja

template<class T >
size_t* BTA< T >::matrix_ja
private

array of type size_t storing the column indices of the sparse input matrix.

◆ matrix_n_nonzeros

template<class T >
size_t BTA< T >::matrix_n_nonzeros
private

number of nonzeros in the block formatted matrix. NOT sparse input.

◆ matrix_nd

template<class T >
size_t BTA< T >::matrix_nd
private

number of dense rows in the arrowhead.

◆ matrix_ns

template<class T >
size_t BTA< T >::matrix_ns
private

size of diagonal blocks, i.e. number of spatial nodes.

◆ matrix_nt

template<class T >
size_t BTA< T >::matrix_nt
private

number of large diagonal blocks, i.e. number of time steps.

◆ matrix_size

template<class T >
size_t BTA< T >::matrix_size
private

dimension of the matrix.


The documentation for this class was generated from the following file: