INLA_DIST
|
Template class for Block Triangular Arrowhead Solver. More...
#include <BTA.H>
Public Member Functions | |
BTA (size_t ns, size_t nt, size_t nd, int GPU_rank_) | |
Constructor for BTA Solver. | |
~BTA () | |
Destructor for BTA class. | |
double | factorize (size_t *ia, size_t *ja, T *a, double &t_firstStageFactor) |
Perform factorization of sparse matrix in CSC format. | |
double | factorize_noCopyHost (size_t *ia, size_t *ja, T *a, T &logDet) |
Perform factorization on the given matrix without copying factor back to host. | |
double | factorizeSolve (size_t *ia, size_t *ja, T *a, T *x, T *rhs, size_t nrhs, double &t_firstSecondStage, double &t_SecondStageBackPass) |
Perform factorization and solve the linear system for multiple right-hand sides. forward substitution of solve already happens on GPU, while the factorization is being computed. | |
double | solve (size_t *ia, size_t *ja, T *a, T *rhs, size_t nrhs, double &t_secondStageForwardPass, double &t_secondStageBackwardPass) |
Solve the linear system when cholesky factor is already computed. | |
double | solve (size_t *, size_t *, T *, T *, T *, size_t, double &t_secondStageForwardPass, double &t_secondStageBackwardPass) |
Solve the linear system for a single right-hand side. | |
double | solve_s (size_t *ia, size_t *ja, double *a, double *x, double *rhs, size_t nrhs) |
single precision solve but assuming double precision input. | |
double | solve_d (size_t *, size_t *, float *, float *, float *, size_t) |
double precision solve but assuming single precision input. | |
double | BTAdiag (size_t *ia, size_t *ja, T *a, T *diag) |
compute selected inverse. return only the diagonal of the inverse. | |
double | BTAinvBlks (size_t *, size_t *, T *, T *) |
double | BTAselInv (size_t *ia, size_t *ja, T *a, T *invQ) |
compute selected inverse. return all elements of the inverse that were nonzero in Q and are within the diagonal block. | |
T | logDet (size_t *ia, size_t *ja, T *a) |
compute log determinant. | |
double | residualNorm (T *x, T *b) |
compute residual norm as || r || = || b - A*x ||. | |
double | residualNormNormalized (T *x, T *b) |
compute residual norm as || rel r || = || b - A*x || / || b ||. | |
double | flop_count_factorise () |
Private Member Functions | |
size_t | mf_block_index (size_t, size_t) |
size_t | mf_block_lda (size_t, size_t) |
size_t | mf_dense_block_index (size_t) |
size_t | mf_dense_block_offset (size_t) |
size_t | mf_dense_block_lda (size_t) |
size_t | invblks_diag_block_index (size_t) |
size_t | invblks_dense_block_index (size_t) |
double | FirstStageFactor () |
double | FirstSecondStageFactor (size_t nhrs) |
double | FirstStageFactor_noCopyHost (T &logDet) |
double | FirstStageFactor_noCopyHost_testV (double &logDet) |
double | ForwardPassSolve (size_t) |
double | BackwardPassSolve (size_t) |
double | SecondStageSolve (size_t, double &t_secondStageForwardPass, double &t_secondStageBackwardPass) |
double | SecondStageSolve_s (size_t, float *rhs_s) |
double | SecondStageSolve_d (size_t, double *rhs_d) |
double | ThirdStageBTA (T *, T *, int) |
void | initialize_MF_host () |
void | initialize_invBlks_host () |
void | get_max_supernode_nnz () |
void | init_supernode (T *M_dev, size_t supernode, cudaStream_t stream) |
void | copy_supernode_to_host (T *M_dev, size_t supernode, cudaStream_t stream) |
void | extract_nnzA (T *M_dev, size_t supernode) |
void | copy_supernode_to_host_write (T *M_dev, size_t supernode) |
void | copy_supernode_to_device (T *M_dev, size_t supernode, cudaStream_t stream) |
void | copy_supernode_diag (T *src, size_t supernode) |
void | swap_pointers (T **ptr1, T **ptr2) |
T | f_one () |
T | f_zero () |
CPX | f_one () |
float | f_one () |
CPX | f_zero () |
float | f_zero () |
Private Attributes | |
size_t * | matrix_ia |
size_t * | matrix_ja |
T * | matrix_a |
size_t | matrix_size |
size_t | matrix_n_nonzeros |
size_t | matrix_ns |
size_t | matrix_nt |
size_t | matrix_nd |
size_t * | Bmin |
size_t * | Bmax |
size_t | NBlock |
size_t * | diag_pos |
size_t | max_supernode_nnz = 0 |
size_t | ind_invBlks_fi |
size_t | mem_alloc_dev = 0 |
int | GPU_rank |
bool | MF_allocated = false |
bool | invBlks_allocated = false |
bool | MF_dev_allocated = false |
bool | factorization_completed = false |
int | cpy_indicator |
magma_queue_t | magma_queue_1 |
magma_queue_t | magma_queue_2 |
cudaStream_t | stream_c |
cudaStream_t | copyStream = NULL |
cudaStream_t | magma_cudaStream_1 = NULL |
cudaStream_t | magma_cudaStream_2 = NULL |
cudaEvent_t | initBlock_dev_ev |
cudaEvent_t | potrf_dev_ev |
magma_event_t | potrf_dev_magma_ev |
void * | cublas_handle |
int * | info_cuda = NULL |
int * | cuda_buffer_flag_potrf |
int * | cuda_buffer_flag_trtri |
cusolverDnHandle_t * | handle |
cusolverDnParams_t * | params |
size_t * | dev_size |
size_t * | host_size |
double * | mem_cuda_dev |
double * | mem_cuda_host |
T * | MF |
T * | invBlks |
T * | inv_a |
T * | blockR_dev |
T * | blockM_dev |
T * | blockDense_dev |
T * | rhs |
T * | rhs_dev |
size_t * | ia_dev |
size_t * | ja_dev |
T * | a_dev |
size_t * | inv_ia_dev |
size_t * | inv_ja_dev |
T * | inv_a_dev |
T * | diag_dev |
size_t * | diag_pos_dev |
Template class for Block Triangular Arrowhead Solver.
T | Data type of the matrix elements. Choices are double, single (and some remainders of complex) |
double BTA< T >::BTAdiag | ( | size_t * | ia, |
size_t * | ja, | ||
T * | a, | ||
T * | diag | ||
) |
compute selected inverse. return only the diagonal of the inverse.
[in] | ia | Array of type size_t storing the row pointers of the matrix. |
[in] | ja | Array of type size_t storing the column indices of the matrix. |
[in] | a | Array of type T storing the values of the matrix. |
[in,out] | diag | Array of type T diagonal of the inverse. |
double BTA< T >::BTAselInv | ( | size_t * | ia, |
size_t * | ja, | ||
T * | a, | ||
T * | invQ | ||
) |
compute selected inverse. return all elements of the inverse that were nonzero in Q and are within the diagonal block.
[in] | ia | Array of type size_t storing the row pointers of the matrix. |
[in] | ja | Array of type size_t storing the column indices of the matrix. |
[in] | a | Array of type T storing the values of the matrix. |
[in,out] | invQ | Array of type T diagonal of the inverse. |
double BTA< T >::factorize | ( | size_t * | ia, |
size_t * | ja, | ||
T * | a, | ||
double & | t_firstStageFactor | ||
) |
Perform factorization of sparse matrix in CSC format.
[in] | ia | Array of type size_t storing the row pointers of the matrix. |
[in] | ja | Array of type size_t storing the column indices of the matrix. |
[in] | a | Array of type T storing the values of the matrix. |
[in,out] | t_firstStageFactor | time first state Factor. |
double BTA< T >::factorize_noCopyHost | ( | size_t * | ia, |
size_t * | ja, | ||
T * | a, | ||
T & | logDet | ||
) |
Perform factorization on the given matrix without copying factor back to host.
[in] | ia | Array of type size_t storing the row pointers of the matrix. |
[in] | ja | Array of type size_t storing the column indices of the matrix. |
[in] | a | Array of type T storing the values of the matrix. |
[in,out] | logDet | Log determinant of the matrix. |
double BTA< T >::factorizeSolve | ( | size_t * | ia, |
size_t * | ja, | ||
T * | a, | ||
T * | x, | ||
T * | rhs, | ||
size_t | nrhs, | ||
double & | t_firstSecondStage, | ||
double & | t_SecondStageBackPass | ||
) |
Perform factorization and solve the linear system for multiple right-hand sides. forward substitution of solve already happens on GPU, while the factorization is being computed.
[in] | ia | Array of type size_t storing the row pointers of the matrix. |
[in] | ja | Array of type size_t storing the column indices of the matrix. |
[in] | a | Array of type T storing the values of the matrix. |
[in,out] | x | Array of type T storing the solutions. |
[in] | rhs | Array of type T storing the right-hand sides. |
[in] | nrhs | Number of right-hand sides. |
[in,out] | t_firstSecondStage | Time taken for factorization & forward solve |
[in,out] | t_SecondStageBackPass | Time taken for the backward pass. |
T BTA< T >::logDet | ( | size_t * | ia, |
size_t * | ja, | ||
T * | a | ||
) |
compute log determinant.
[in] | ia | Array of type size_t storing the row pointers of the matrix. |
[in] | ja | Array of type size_t storing the column indices of the matrix. |
[in] | a | Array of type T storing the values of the matrix. |
double BTA< T >::residualNorm | ( | T * | x, |
T * | b | ||
) |
compute residual norm as || r || = || b - A*x ||.
[in] | x | Array of type T storing the row pointers of the matrix. |
[in] | b | Array of type T storing the column indices of the matrix. |
double BTA< T >::residualNormNormalized | ( | T * | x, |
T * | b | ||
) |
compute residual norm as || rel r || = || b - A*x || / || b ||.
[in] | x | Array of type T storing the row pointers of the matrix. |
[in] | b | Array of type T storing the column indices of the matrix. |
double BTA< T >::solve | ( | size_t * | ia, |
size_t * | ja, | ||
T * | a, | ||
T * | x, | ||
T * | b, | ||
size_t | nrhs, | ||
double & | t_secondStageForwardPass, | ||
double & | t_secondStageBackwardPass | ||
) |
Solve the linear system for a single right-hand side.
[in] | ia | Array of type size_t storing the row pointers of the matrix. |
[in] | ja | Array of type size_t storing the column indices of the matrix. |
[in] | a | Array of type T storing the values of the matrix. |
[in,out] | x | Array of type T storing the solution. |
[in] | rhs | Array of type T storing the right-hand side. |
[in,out] | t_secondStageForwardPass | Time taken for the forward pass. |
[in,out] | t_secondStageBackwardPass | Time taken for the backward pass. |
double BTA< T >::solve | ( | size_t * | ia, |
size_t * | ja, | ||
T * | a, | ||
T * | rhs, | ||
size_t | nrhs, | ||
double & | t_secondStageForwardPass, | ||
double & | t_secondStageBackwardPass | ||
) |
Solve the linear system when cholesky factor is already computed.
[in] | ia | Array of type size_t storing the row pointers of the matrix. |
[in] | ja | Array of type size_t storing the column indices of the matrix. |
[in] | a | Array of type T storing the values of the matrix. |
[in] | rhs | rhs, gets overwritten by solution. |
[in] | nrhs | number of of right-hand sides. |
[in,out] | t_secondStageForwardPass | Time taken for the forward pass. |
[in,out] | t_secondStageBackwardPass | Time taken for the backward pass. |
double BTA< T >::solve_d | ( | size_t * | ia, |
size_t * | ja, | ||
float * | a, | ||
float * | x, | ||
float * | b, | ||
size_t | nrhs | ||
) |
double precision solve but assuming single precision input.
[in] | ia | Array of type size_t storing the row pointers of the matrix. |
[in] | ja | Array of type size_t storing the column indices of the matrix. |
[in] | a | Array of type T storing the values of the matrix (single precision). |
[in,out] | x | Array of type T storing the solution. |
[in] | rhs | Array of type T storing the right-hand side (double precision). |
[in] | nrhs | Number of right-hand sides. |
double BTA< T >::solve_s | ( | size_t * | ia, |
size_t * | ja, | ||
double * | a, | ||
double * | x, | ||
double * | rhs, | ||
size_t | nrhs | ||
) |
single precision solve but assuming double precision input.
[in] | ia | Array of type size_t storing the row pointers of the matrix. |
[in] | ja | Array of type size_t storing the column indices of the matrix. |
[in] | a | Array of type T storing the values of the matrix (double precision). |
[in,out] | x | Array of type T storing the solution. |
[in] | rhs | Array of type T storing the right-hand side (single precision). |
[in] | nrhs | Number of right-hand sides. |
|
private |
GPU rank of the current MPI process.
|
private |
array of type T storing the values of the sparse input matrix.
|
private |
array of type size_t storing the row pointers of the sparse input matrix.
|
private |
array of type size_t storing the column indices of the sparse input matrix.
|
private |
number of nonzeros in the block formatted matrix. NOT sparse input.
|
private |
number of dense rows in the arrowhead.
|
private |
size of diagonal blocks, i.e. number of spatial nodes.
|
private |
number of large diagonal blocks, i.e. number of time steps.
|
private |
dimension of the matrix.