Template class for Block Triangular Arrowhead Solver. More...

#include <BTA.H>

Public Member Functions
	BTA (size_t ns, size_t nt, size_t nd, int GPU_rank_)
	Constructor for BTA Solver.

	~BTA ()
	Destructor for BTA class.

double	factorize (size_t ia, size_t ja, T *a, double &t_firstStageFactor)
	Perform factorization of sparse matrix in CSC format.

double	factorize_noCopyHost (size_t ia, size_t ja, T *a, T &logDet)
	Perform factorization on the given matrix without copying factor back to host.

double	factorizeSolve (size_t ia, size_t ja, T a, T x, T *rhs, size_t nrhs, double &t_firstSecondStage, double &t_SecondStageBackPass)
	Perform factorization and solve the linear system for multiple right-hand sides. forward substitution of solve already happens on GPU, while the factorization is being computed.

double	solve (size_t ia, size_t ja, T a, T rhs, size_t nrhs, double &t_secondStageForwardPass, double &t_secondStageBackwardPass)
	Solve the linear system when cholesky factor is already computed.

double	solve (size_t , size_t , T , T , T *, size_t, double &t_secondStageForwardPass, double &t_secondStageBackwardPass)
	Solve the linear system for a single right-hand side.

double	solve_s (size_t ia, size_t ja, double a, double x, double *rhs, size_t nrhs)
	single precision solve but assuming double precision input.

double	solve_d (size_t , size_t , float , float , float *, size_t)
	double precision solve but assuming single precision input.

double	BTAdiag (size_t ia, size_t ja, T a, T diag)
	compute selected inverse. return only the diagonal of the inverse.

double	BTAinvBlks (size_t , size_t , T , T )

double	BTAselInv (size_t ia, size_t ja, T a, T invQ)
	compute selected inverse. return all elements of the inverse that were nonzero in Q and are within the diagonal block.

T	logDet (size_t ia, size_t ja, T *a)
	compute log determinant.

double	residualNorm (T x, T b)
	compute residual norm as \|\| r \|\| = \|\| b - A*x \|\|.

double	residualNormNormalized (T x, T b)
	compute residual norm as \|\| rel r \|\| = \|\| b - A*x \|\| / \|\| b \|\|.

double	flop_count_factorise ()

Private Member Functions
size_t	mf_block_index (size_t, size_t)

size_t	mf_block_lda (size_t, size_t)

size_t	mf_dense_block_index (size_t)

size_t	mf_dense_block_offset (size_t)

size_t	mf_dense_block_lda (size_t)

size_t	invblks_diag_block_index (size_t)

size_t	invblks_dense_block_index (size_t)

double	FirstStageFactor ()

double	FirstSecondStageFactor (size_t nhrs)

double	FirstStageFactor_noCopyHost (T &logDet)

double	FirstStageFactor_noCopyHost_testV (double &logDet)

double	ForwardPassSolve (size_t)

double	BackwardPassSolve (size_t)

double	SecondStageSolve (size_t, double &t_secondStageForwardPass, double &t_secondStageBackwardPass)

double	SecondStageSolve_s (size_t, float *rhs_s)

double	SecondStageSolve_d (size_t, double *rhs_d)

double	ThirdStageBTA (T , T , int)

void	initialize_MF_host ()

void	initialize_invBlks_host ()

void	get_max_supernode_nnz ()

void	init_supernode (T *M_dev, size_t supernode, cudaStream_t stream)

void	copy_supernode_to_host (T *M_dev, size_t supernode, cudaStream_t stream)

void	extract_nnzA (T *M_dev, size_t supernode)

void	copy_supernode_to_host_write (T *M_dev, size_t supernode)

void	copy_supernode_to_device (T *M_dev, size_t supernode, cudaStream_t stream)

void	copy_supernode_diag (T *src, size_t supernode)

void	swap_pointers (T ptr1, T ptr2)

T	f_one ()

T	f_zero ()

CPX	f_one ()

float	f_one ()

CPX	f_zero ()

float	f_zero ()

Private Attributes
size_t *	matrix_ia

size_t *	matrix_ja

T *	matrix_a

size_t	matrix_size

size_t	matrix_n_nonzeros

size_t	matrix_ns

size_t	matrix_nt

size_t	matrix_nd

size_t *	Bmin

size_t *	Bmax

size_t	NBlock

size_t *	diag_pos

size_t	max_supernode_nnz = 0

size_t	ind_invBlks_fi

size_t	mem_alloc_dev = 0

int	GPU_rank

bool	MF_allocated = false

bool	invBlks_allocated = false

bool	MF_dev_allocated = false

bool	factorization_completed = false

int	cpy_indicator

magma_queue_t	magma_queue_1

magma_queue_t	magma_queue_2

cudaStream_t	stream_c

cudaStream_t	copyStream = NULL

cudaStream_t	magma_cudaStream_1 = NULL

cudaStream_t	magma_cudaStream_2 = NULL

cudaEvent_t	initBlock_dev_ev

cudaEvent_t	potrf_dev_ev

magma_event_t	potrf_dev_magma_ev

void *	cublas_handle

int *	info_cuda = NULL

int *	cuda_buffer_flag_potrf

int *	cuda_buffer_flag_trtri

cusolverDnHandle_t *	handle

cusolverDnParams_t *	params

size_t *	dev_size

size_t *	host_size

double *	mem_cuda_dev

double *	mem_cuda_host

T *	MF

T *	invBlks

T *	inv_a

T *	blockR_dev

T *	blockM_dev

T *	blockDense_dev

T *	rhs

T *	rhs_dev

size_t *	ia_dev

size_t *	ja_dev

T *	a_dev

size_t *	inv_ia_dev

size_t *	inv_ja_dev

T *	inv_a_dev

T *	diag_dev

size_t *	diag_pos_dev

Detailed Description

template<class T>
class BTA< T >

Template class for Block Triangular Arrowhead Solver.

Template Parameters

T	Data type of the matrix elements. Choices are double, single (and some remainders of complex)

Constructor & Destructor Documentation

◆ BTA()

template<class T >

BTA< T >::BTA	(	size_t	ns,
		size_t	nt,
		size_t	nd,
		int	GPU_rank_
	)

Constructor for BTA Solver.

Parameters

ns	number of spatial nodes.
nt	number of timesteps.
nd	number of fixed effects.
GPU_rank_	GPU rank.

Member Function Documentation

◆ BTAdiag()

template<class T >

double BTA< T >::BTAdiag	(	size_t *	ia,
		size_t *	ja,
		T *	a,
		T *	diag
	)

compute selected inverse. return only the diagonal of the inverse.

Parameters

[in]	ia	Array of type size_t storing the row pointers of the matrix.
[in]	ja	Array of type size_t storing the column indices of the matrix.
[in]	a	Array of type T storing the values of the matrix.
[in,out]	diag	Array of type T diagonal of the inverse.

Returns: number of GFLOP/S.

◆ BTAselInv()

template<class T >

double BTA< T >::BTAselInv	(	size_t *	ia,
		size_t *	ja,
		T *	a,
		T *	invQ
	)

compute selected inverse. return all elements of the inverse that were nonzero in Q and are within the diagonal block.

Parameters

[in]	ia	Array of type size_t storing the row pointers of the matrix.
[in]	ja	Array of type size_t storing the column indices of the matrix.
[in]	a	Array of type T storing the values of the matrix.
[in,out]	invQ	Array of type T diagonal of the inverse.

Returns: number of GFLOP/S.

◆ factorize()

template<class T >

double BTA< T >::factorize	(	size_t *	ia,
		size_t *	ja,
		T *	a,
		double &	t_firstStageFactor
	)

Perform factorization of sparse matrix in CSC format.

Parameters

[in]	ia	Array of type size_t storing the row pointers of the matrix.
[in]	ja	Array of type size_t storing the column indices of the matrix.
[in]	a	Array of type T storing the values of the matrix.
[in,out]	t_firstStageFactor	time first state Factor.

Returns: number of GFLOP/S.

◆ factorize_noCopyHost()

template<class T >

double BTA< T >::factorize_noCopyHost	(	size_t *	ia,
		size_t *	ja,
		T *	a,
		T &	logDet
	)

Perform factorization on the given matrix without copying factor back to host.

Parameters

[in]	ia	Array of type size_t storing the row pointers of the matrix.
[in]	ja	Array of type size_t storing the column indices of the matrix.
[in]	a	Array of type T storing the values of the matrix.
[in,out]	logDet	Log determinant of the matrix.

Returns: number of GFLOP/S.

◆ factorizeSolve()

template<class T >

double BTA< T >::factorizeSolve	(	size_t *	ia,
		size_t *	ja,
		T *	a,
		T *	x,
		T *	rhs,
		size_t	nrhs,
		double &	t_firstSecondStage,
		double &	t_SecondStageBackPass
	)

Perform factorization and solve the linear system for multiple right-hand sides. forward substitution of solve already happens on GPU, while the factorization is being computed.

Parameters

[in]	ia	Array of type size_t storing the row pointers of the matrix.
[in]	ja	Array of type size_t storing the column indices of the matrix.
[in]	a	Array of type T storing the values of the matrix.
[in,out]	x	Array of type T storing the solutions.
[in]	rhs	Array of type T storing the right-hand sides.
[in]	nrhs	Number of right-hand sides.
[in,out]	t_firstSecondStage	Time taken for factorization & forward solve
[in,out]	t_SecondStageBackPass	Time taken for the backward pass.

Returns: number of GFLOP/S.

◆ logDet()

template<class T >

T BTA< T >::logDet	(	size_t *	ia,
		size_t *	ja,
		T *	a
	)

compute log determinant.

Parameters

[in]	ia	Array of type size_t storing the row pointers of the matrix.
[in]	ja	Array of type size_t storing the column indices of the matrix.
[in]	a	Array of type T storing the values of the matrix.

Returns: log determinant.

◆ residualNorm()

template<class T >

double BTA< T >::residualNorm	(	T *	x,
		T *	b
	)

compute residual norm as || r || = || b - A*x ||.

Parameters

[in]	x	Array of type T storing the row pointers of the matrix.
[in]	b	Array of type T storing the column indices of the matrix.

Returns: || r || = || b - A*x ||.

◆ residualNormNormalized()

template<class T >

double BTA< T >::residualNormNormalized	(	T *	x,
		T *	b
	)

compute residual norm as || rel r || = || b - A*x || / || b ||.

Parameters

[in]	x	Array of type T storing the row pointers of the matrix.
[in]	b	Array of type T storing the column indices of the matrix.

Returns: || rel r|| = || b - A*x || / || b || .

◆ solve() [1/2]

template<class T >

double BTA< T >::solve	(	size_t *	ia,
		size_t *	ja,
		T *	a,
		T *	x,
		T *	b,
		size_t	nrhs,
		double &	t_secondStageForwardPass,
		double &	t_secondStageBackwardPass
	)

Solve the linear system for a single right-hand side.

Parameters

[in]	ia	Array of type size_t storing the row pointers of the matrix.
[in]	ja	Array of type size_t storing the column indices of the matrix.
[in]	a	Array of type T storing the values of the matrix.
[in,out]	x	Array of type T storing the solution.
[in]	rhs	Array of type T storing the right-hand side.
[in,out]	t_secondStageForwardPass	Time taken for the forward pass.
[in,out]	t_secondStageBackwardPass	Time taken for the backward pass.

Returns: number of GFLOP/S.

◆ solve() [2/2]

template<class T >

double BTA< T >::solve	(	size_t *	ia,
		size_t *	ja,
		T *	a,
		T *	rhs,
		size_t	nrhs,
		double &	t_secondStageForwardPass,
		double &	t_secondStageBackwardPass
	)

Solve the linear system when cholesky factor is already computed.

Parameters

[in]	ia	Array of type size_t storing the row pointers of the matrix.
[in]	ja	Array of type size_t storing the column indices of the matrix.
[in]	a	Array of type T storing the values of the matrix.
[in]	rhs	rhs, gets overwritten by solution.
[in]	nrhs	number of of right-hand sides.
[in,out]	t_secondStageForwardPass	Time taken for the forward pass.
[in,out]	t_secondStageBackwardPass	Time taken for the backward pass.

Returns: number of GFLOP/S.

◆ solve_d()

template<class T >

double BTA< T >::solve_d	(	size_t *	ia,
		size_t *	ja,
		float *	a,
		float *	x,
		float *	b,
		size_t	nrhs
	)

double precision solve but assuming single precision input.

Parameters

[in]	ia	Array of type size_t storing the row pointers of the matrix.
[in]	ja	Array of type size_t storing the column indices of the matrix.
[in]	a	Array of type T storing the values of the matrix (single precision).
[in,out]	x	Array of type T storing the solution.
[in]	rhs	Array of type T storing the right-hand side (double precision).
[in]	nrhs	Number of right-hand sides.

Returns: number of GFLOP/S.

◆ solve_s()

template<class T >

double BTA< T >::solve_s	(	size_t *	ia,
		size_t *	ja,
		double *	a,
		double *	x,
		double *	rhs,
		size_t	nrhs
	)

single precision solve but assuming double precision input.

Parameters

[in]	ia	Array of type size_t storing the row pointers of the matrix.
[in]	ja	Array of type size_t storing the column indices of the matrix.
[in]	a	Array of type T storing the values of the matrix (double precision).
[in,out]	x	Array of type T storing the solution.
[in]	rhs	Array of type T storing the right-hand side (single precision).
[in]	nrhs	Number of right-hand sides.

Returns: number of GFLOP/S.

dimension of the matrix.

The documentation for this class was generated from the following file:

BTA/BTA.H

Public Member Functions

Private Member Functions

Private Attributes

Detailed Description

Constructor & Destructor Documentation

◆ BTA()

Member Function Documentation

◆ BTAdiag()

◆ BTAselInv()

◆ factorize()

◆ factorize_noCopyHost()

◆ factorizeSolve()

◆ logDet()

◆ residualNorm()

◆ residualNormNormalized()

◆ solve() [1/2]

◆ solve() [2/2]

◆ solve_d()

◆ solve_s()

Member Data Documentation

◆ GPU_rank

◆ matrix_a

◆ matrix_ia

◆ matrix_ja

◆ matrix_n_nonzeros

◆ matrix_nd

◆ matrix_ns

◆ matrix_nt

◆ matrix_size