|
INLA_DIST
|
Template class for Block Triangular Arrowhead Solver. More...
#include <BTA.H>
Public Member Functions | |
| BTA (size_t ns, size_t nt, size_t nd, int GPU_rank_) | |
| Constructor for BTA Solver. | |
| ~BTA () | |
| Destructor for BTA class. | |
| double | factorize (size_t *ia, size_t *ja, T *a, double &t_firstStageFactor) |
| Perform factorization of sparse matrix in CSC format. | |
| double | factorize_noCopyHost (size_t *ia, size_t *ja, T *a, T &logDet) |
| Perform factorization on the given matrix without copying factor back to host. | |
| double | factorizeSolve (size_t *ia, size_t *ja, T *a, T *x, T *rhs, size_t nrhs, double &t_firstSecondStage, double &t_SecondStageBackPass) |
| Perform factorization and solve the linear system for multiple right-hand sides. forward substitution of solve already happens on GPU, while the factorization is being computed. | |
| double | solve (size_t *ia, size_t *ja, T *a, T *rhs, size_t nrhs, double &t_secondStageForwardPass, double &t_secondStageBackwardPass) |
| Solve the linear system when cholesky factor is already computed. | |
| double | solve (size_t *, size_t *, T *, T *, T *, size_t, double &t_secondStageForwardPass, double &t_secondStageBackwardPass) |
| Solve the linear system for a single right-hand side. | |
| double | solve_s (size_t *ia, size_t *ja, double *a, double *x, double *rhs, size_t nrhs) |
| single precision solve but assuming double precision input. | |
| double | solve_d (size_t *, size_t *, float *, float *, float *, size_t) |
| double precision solve but assuming single precision input. | |
| double | BTAdiag (size_t *ia, size_t *ja, T *a, T *diag) |
| compute selected inverse. return only the diagonal of the inverse. | |
| double | BTAinvBlks (size_t *, size_t *, T *, T *) |
| double | BTAselInv (size_t *ia, size_t *ja, T *a, T *invQ) |
| compute selected inverse. return all elements of the inverse that were nonzero in Q and are within the diagonal block. | |
| T | logDet (size_t *ia, size_t *ja, T *a) |
| compute log determinant. | |
| double | residualNorm (T *x, T *b) |
| compute residual norm as || r || = || b - A*x ||. | |
| double | residualNormNormalized (T *x, T *b) |
| compute residual norm as || rel r || = || b - A*x || / || b ||. | |
| double | flop_count_factorise () |
Private Member Functions | |
| size_t | mf_block_index (size_t, size_t) |
| size_t | mf_block_lda (size_t, size_t) |
| size_t | mf_dense_block_index (size_t) |
| size_t | mf_dense_block_offset (size_t) |
| size_t | mf_dense_block_lda (size_t) |
| size_t | invblks_diag_block_index (size_t) |
| size_t | invblks_dense_block_index (size_t) |
| double | FirstStageFactor () |
| double | FirstSecondStageFactor (size_t nhrs) |
| double | FirstStageFactor_noCopyHost (T &logDet) |
| double | FirstStageFactor_noCopyHost_testV (double &logDet) |
| double | ForwardPassSolve (size_t) |
| double | BackwardPassSolve (size_t) |
| double | SecondStageSolve (size_t, double &t_secondStageForwardPass, double &t_secondStageBackwardPass) |
| double | SecondStageSolve_s (size_t, float *rhs_s) |
| double | SecondStageSolve_d (size_t, double *rhs_d) |
| double | ThirdStageBTA (T *, T *, int) |
| void | initialize_MF_host () |
| void | initialize_invBlks_host () |
| void | get_max_supernode_nnz () |
| void | init_supernode (T *M_dev, size_t supernode, cudaStream_t stream) |
| void | copy_supernode_to_host (T *M_dev, size_t supernode, cudaStream_t stream) |
| void | extract_nnzA (T *M_dev, size_t supernode) |
| void | copy_supernode_to_host_write (T *M_dev, size_t supernode) |
| void | copy_supernode_to_device (T *M_dev, size_t supernode, cudaStream_t stream) |
| void | copy_supernode_diag (T *src, size_t supernode) |
| void | swap_pointers (T **ptr1, T **ptr2) |
| T | f_one () |
| T | f_zero () |
| CPX | f_one () |
| float | f_one () |
| CPX | f_zero () |
| float | f_zero () |
Private Attributes | |
| size_t * | matrix_ia |
| size_t * | matrix_ja |
| T * | matrix_a |
| size_t | matrix_size |
| size_t | matrix_n_nonzeros |
| size_t | matrix_ns |
| size_t | matrix_nt |
| size_t | matrix_nd |
| size_t * | Bmin |
| size_t * | Bmax |
| size_t | NBlock |
| size_t * | diag_pos |
| size_t | max_supernode_nnz = 0 |
| size_t | ind_invBlks_fi |
| size_t | mem_alloc_dev = 0 |
| int | GPU_rank |
| bool | MF_allocated = false |
| bool | invBlks_allocated = false |
| bool | MF_dev_allocated = false |
| bool | factorization_completed = false |
| int | cpy_indicator |
| magma_queue_t | magma_queue_1 |
| magma_queue_t | magma_queue_2 |
| cudaStream_t | stream_c |
| cudaStream_t | copyStream = NULL |
| cudaStream_t | magma_cudaStream_1 = NULL |
| cudaStream_t | magma_cudaStream_2 = NULL |
| cudaEvent_t | initBlock_dev_ev |
| cudaEvent_t | potrf_dev_ev |
| magma_event_t | potrf_dev_magma_ev |
| void * | cublas_handle |
| int * | info_cuda = NULL |
| int * | cuda_buffer_flag_potrf |
| int * | cuda_buffer_flag_trtri |
| cusolverDnHandle_t * | handle |
| cusolverDnParams_t * | params |
| size_t * | dev_size |
| size_t * | host_size |
| double * | mem_cuda_dev |
| double * | mem_cuda_host |
| T * | MF |
| T * | invBlks |
| T * | inv_a |
| T * | blockR_dev |
| T * | blockM_dev |
| T * | blockDense_dev |
| T * | rhs |
| T * | rhs_dev |
| size_t * | ia_dev |
| size_t * | ja_dev |
| T * | a_dev |
| size_t * | inv_ia_dev |
| size_t * | inv_ja_dev |
| T * | inv_a_dev |
| T * | diag_dev |
| size_t * | diag_pos_dev |
Template class for Block Triangular Arrowhead Solver.
| T | Data type of the matrix elements. Choices are double, single (and some remainders of complex) |
| double BTA< T >::BTAdiag | ( | size_t * | ia, |
| size_t * | ja, | ||
| T * | a, | ||
| T * | diag | ||
| ) |
compute selected inverse. return only the diagonal of the inverse.
| [in] | ia | Array of type size_t storing the row pointers of the matrix. |
| [in] | ja | Array of type size_t storing the column indices of the matrix. |
| [in] | a | Array of type T storing the values of the matrix. |
| [in,out] | diag | Array of type T diagonal of the inverse. |
| double BTA< T >::BTAselInv | ( | size_t * | ia, |
| size_t * | ja, | ||
| T * | a, | ||
| T * | invQ | ||
| ) |
compute selected inverse. return all elements of the inverse that were nonzero in Q and are within the diagonal block.
| [in] | ia | Array of type size_t storing the row pointers of the matrix. |
| [in] | ja | Array of type size_t storing the column indices of the matrix. |
| [in] | a | Array of type T storing the values of the matrix. |
| [in,out] | invQ | Array of type T diagonal of the inverse. |
| double BTA< T >::factorize | ( | size_t * | ia, |
| size_t * | ja, | ||
| T * | a, | ||
| double & | t_firstStageFactor | ||
| ) |
Perform factorization of sparse matrix in CSC format.
| [in] | ia | Array of type size_t storing the row pointers of the matrix. |
| [in] | ja | Array of type size_t storing the column indices of the matrix. |
| [in] | a | Array of type T storing the values of the matrix. |
| [in,out] | t_firstStageFactor | time first state Factor. |
| double BTA< T >::factorize_noCopyHost | ( | size_t * | ia, |
| size_t * | ja, | ||
| T * | a, | ||
| T & | logDet | ||
| ) |
Perform factorization on the given matrix without copying factor back to host.
| [in] | ia | Array of type size_t storing the row pointers of the matrix. |
| [in] | ja | Array of type size_t storing the column indices of the matrix. |
| [in] | a | Array of type T storing the values of the matrix. |
| [in,out] | logDet | Log determinant of the matrix. |
| double BTA< T >::factorizeSolve | ( | size_t * | ia, |
| size_t * | ja, | ||
| T * | a, | ||
| T * | x, | ||
| T * | rhs, | ||
| size_t | nrhs, | ||
| double & | t_firstSecondStage, | ||
| double & | t_SecondStageBackPass | ||
| ) |
Perform factorization and solve the linear system for multiple right-hand sides. forward substitution of solve already happens on GPU, while the factorization is being computed.
| [in] | ia | Array of type size_t storing the row pointers of the matrix. |
| [in] | ja | Array of type size_t storing the column indices of the matrix. |
| [in] | a | Array of type T storing the values of the matrix. |
| [in,out] | x | Array of type T storing the solutions. |
| [in] | rhs | Array of type T storing the right-hand sides. |
| [in] | nrhs | Number of right-hand sides. |
| [in,out] | t_firstSecondStage | Time taken for factorization & forward solve |
| [in,out] | t_SecondStageBackPass | Time taken for the backward pass. |
| T BTA< T >::logDet | ( | size_t * | ia, |
| size_t * | ja, | ||
| T * | a | ||
| ) |
compute log determinant.
| [in] | ia | Array of type size_t storing the row pointers of the matrix. |
| [in] | ja | Array of type size_t storing the column indices of the matrix. |
| [in] | a | Array of type T storing the values of the matrix. |
| double BTA< T >::residualNorm | ( | T * | x, |
| T * | b | ||
| ) |
compute residual norm as || r || = || b - A*x ||.
| [in] | x | Array of type T storing the row pointers of the matrix. |
| [in] | b | Array of type T storing the column indices of the matrix. |
| double BTA< T >::residualNormNormalized | ( | T * | x, |
| T * | b | ||
| ) |
compute residual norm as || rel r || = || b - A*x || / || b ||.
| [in] | x | Array of type T storing the row pointers of the matrix. |
| [in] | b | Array of type T storing the column indices of the matrix. |
| double BTA< T >::solve | ( | size_t * | ia, |
| size_t * | ja, | ||
| T * | a, | ||
| T * | x, | ||
| T * | b, | ||
| size_t | nrhs, | ||
| double & | t_secondStageForwardPass, | ||
| double & | t_secondStageBackwardPass | ||
| ) |
Solve the linear system for a single right-hand side.
| [in] | ia | Array of type size_t storing the row pointers of the matrix. |
| [in] | ja | Array of type size_t storing the column indices of the matrix. |
| [in] | a | Array of type T storing the values of the matrix. |
| [in,out] | x | Array of type T storing the solution. |
| [in] | rhs | Array of type T storing the right-hand side. |
| [in,out] | t_secondStageForwardPass | Time taken for the forward pass. |
| [in,out] | t_secondStageBackwardPass | Time taken for the backward pass. |
| double BTA< T >::solve | ( | size_t * | ia, |
| size_t * | ja, | ||
| T * | a, | ||
| T * | rhs, | ||
| size_t | nrhs, | ||
| double & | t_secondStageForwardPass, | ||
| double & | t_secondStageBackwardPass | ||
| ) |
Solve the linear system when cholesky factor is already computed.
| [in] | ia | Array of type size_t storing the row pointers of the matrix. |
| [in] | ja | Array of type size_t storing the column indices of the matrix. |
| [in] | a | Array of type T storing the values of the matrix. |
| [in] | rhs | rhs, gets overwritten by solution. |
| [in] | nrhs | number of of right-hand sides. |
| [in,out] | t_secondStageForwardPass | Time taken for the forward pass. |
| [in,out] | t_secondStageBackwardPass | Time taken for the backward pass. |
| double BTA< T >::solve_d | ( | size_t * | ia, |
| size_t * | ja, | ||
| float * | a, | ||
| float * | x, | ||
| float * | b, | ||
| size_t | nrhs | ||
| ) |
double precision solve but assuming single precision input.
| [in] | ia | Array of type size_t storing the row pointers of the matrix. |
| [in] | ja | Array of type size_t storing the column indices of the matrix. |
| [in] | a | Array of type T storing the values of the matrix (single precision). |
| [in,out] | x | Array of type T storing the solution. |
| [in] | rhs | Array of type T storing the right-hand side (double precision). |
| [in] | nrhs | Number of right-hand sides. |
| double BTA< T >::solve_s | ( | size_t * | ia, |
| size_t * | ja, | ||
| double * | a, | ||
| double * | x, | ||
| double * | rhs, | ||
| size_t | nrhs | ||
| ) |
single precision solve but assuming double precision input.
| [in] | ia | Array of type size_t storing the row pointers of the matrix. |
| [in] | ja | Array of type size_t storing the column indices of the matrix. |
| [in] | a | Array of type T storing the values of the matrix (double precision). |
| [in,out] | x | Array of type T storing the solution. |
| [in] | rhs | Array of type T storing the right-hand side (single precision). |
| [in] | nrhs | Number of right-hand sides. |
|
private |
GPU rank of the current MPI process.
|
private |
array of type T storing the values of the sparse input matrix.
|
private |
array of type size_t storing the row pointers of the sparse input matrix.
|
private |
array of type size_t storing the column indices of the sparse input matrix.
|
private |
number of nonzeros in the block formatted matrix. NOT sparse input.
|
private |
number of dense rows in the arrowhead.
|
private |
size of diagonal blocks, i.e. number of spatial nodes.
|
private |
number of large diagonal blocks, i.e. number of time steps.
|
private |
dimension of the matrix.