![]() |
Reference documentation for deal.II version 9.1.0-pre
|
#include <deal.II/lac/scalapack.h>
Public Types | |
using | size_type = unsigned int |
Public Member Functions | |
ScaLAPACKMatrix (const size_type n_rows, const size_type n_columns, const std::shared_ptr< const Utilities::MPI::ProcessGrid > &process_grid, const size_type row_block_size=32, const size_type column_block_size=32, const LAPACKSupport::Property property=LAPACKSupport::Property::general) | |
ScaLAPACKMatrix (const size_type size, const std::shared_ptr< const Utilities::MPI::ProcessGrid > process_grid, const size_type block_size=32, const LAPACKSupport::Property property=LAPACKSupport::Property::symmetric) | |
~ScaLAPACKMatrix () override=default | |
void | reinit (const size_type n_rows, const size_type n_columns, const std::shared_ptr< const Utilities::MPI::ProcessGrid > &process_grid, const size_type row_block_size=32, const size_type column_block_size=32, const LAPACKSupport::Property property=LAPACKSupport::Property::general) |
void | reinit (const size_type size, const std::shared_ptr< const Utilities::MPI::ProcessGrid > process_grid, const size_type block_size=32, const LAPACKSupport::Property property=LAPACKSupport::Property::symmetric) |
void | set_property (const LAPACKSupport::Property property) |
LAPACKSupport::Property | get_property () const |
LAPACKSupport::State | get_state () const |
ScaLAPACKMatrix< NumberType > & | operator= (const FullMatrix< NumberType > &) |
void | copy_to (FullMatrix< NumberType > &matrix) const |
void | copy_to (ScaLAPACKMatrix< NumberType > &dest) const |
void | copy_to (ScaLAPACKMatrix< NumberType > &B, const std::pair< unsigned int, unsigned int > &offset_A, const std::pair< unsigned int, unsigned int > &offset_B, const std::pair< unsigned int, unsigned int > &submatrix_size) const |
void | copy_transposed (const ScaLAPACKMatrix< NumberType > &B) |
void | add (const ScaLAPACKMatrix< NumberType > &B, const NumberType a=0., const NumberType b=1., const bool transpose_B=false) |
void | add (const NumberType b, const ScaLAPACKMatrix< NumberType > &B) |
void | Tadd (const NumberType b, const ScaLAPACKMatrix< NumberType > &B) |
void | mult (const NumberType b, const ScaLAPACKMatrix< NumberType > &B, const NumberType c, ScaLAPACKMatrix< NumberType > &C, const bool transpose_A=false, const bool transpose_B=false) const |
void | mmult (ScaLAPACKMatrix< NumberType > &C, const ScaLAPACKMatrix< NumberType > &B, const bool adding=false) const |
void | Tmmult (ScaLAPACKMatrix< NumberType > &C, const ScaLAPACKMatrix< NumberType > &B, const bool adding=false) const |
void | mTmult (ScaLAPACKMatrix< NumberType > &C, const ScaLAPACKMatrix< NumberType > &B, const bool adding=false) const |
void | TmTmult (ScaLAPACKMatrix< NumberType > &C, const ScaLAPACKMatrix< NumberType > &B, const bool adding=false) const |
void | save (const char *filename, const std::pair< unsigned int, unsigned int > &chunk_size=std::make_pair(numbers::invalid_unsigned_int, numbers::invalid_unsigned_int)) const |
void | load (const char *filename) |
void | compute_cholesky_factorization () |
void | compute_lu_factorization () |
void | invert () |
std::vector< NumberType > | eigenpairs_symmetric_by_index (const std::pair< unsigned int, unsigned int > &index_limits, const bool compute_eigenvectors) |
std::vector< NumberType > | eigenpairs_symmetric_by_value (const std::pair< NumberType, NumberType > &value_limits, const bool compute_eigenvectors) |
std::vector< NumberType > | eigenpairs_symmetric_by_index_MRRR (const std::pair< unsigned int, unsigned int > &index_limits, const bool compute_eigenvectors) |
std::vector< NumberType > | eigenpairs_symmetric_by_value_MRRR (const std::pair< NumberType, NumberType > &value_limits, const bool compute_eigenvectors) |
std::vector< NumberType > | compute_SVD (ScaLAPACKMatrix< NumberType > *U=nullptr, ScaLAPACKMatrix< NumberType > *VT=nullptr) |
void | least_squares (ScaLAPACKMatrix< NumberType > &B, const bool transpose=false) |
unsigned int | pseudoinverse (const NumberType ratio) |
NumberType | reciprocal_condition_number (const NumberType a_norm) const |
NumberType | l1_norm () const |
NumberType | linfty_norm () const |
NumberType | frobenius_norm () const |
size_type | m () const |
size_type | n () const |
unsigned int | local_m () const |
unsigned int | local_n () const |
unsigned int | global_row (const unsigned int loc_row) const |
unsigned int | global_column (const unsigned int loc_column) const |
NumberType | local_el (const unsigned int loc_row, const unsigned int loc_column) const |
NumberType & | local_el (const unsigned int loc_row, const unsigned int loc_column) |
template<class InputVector > | |
void | scale_columns (const InputVector &factors) |
template<class InputVector > | |
void | scale_rows (const InputVector &factors) |
Private Member Functions | |
NumberType | norm_symmetric (const char type) const |
NumberType | norm_general (const char type) const |
std::vector< NumberType > | eigenpairs_symmetric (const bool compute_eigenvectors, const std::pair< unsigned int, unsigned int > &index_limits=std::make_pair(numbers::invalid_unsigned_int, numbers::invalid_unsigned_int), const std::pair< NumberType, NumberType > &value_limits=std::make_pair(std::numeric_limits< NumberType >::quiet_NaN(), std::numeric_limits< NumberType >::quiet_NaN())) |
std::vector< NumberType > | eigenpairs_symmetric_MRRR (const bool compute_eigenvectors, const std::pair< unsigned int, unsigned int > &index_limits=std::make_pair(numbers::invalid_unsigned_int, numbers::invalid_unsigned_int), const std::pair< NumberType, NumberType > &value_limits=std::make_pair(std::numeric_limits< NumberType >::quiet_NaN(), std::numeric_limits< NumberType >::quiet_NaN())) |
Private Attributes | |
LAPACKSupport::State | state |
LAPACKSupport::Property | property |
std::shared_ptr< const Utilities::MPI::ProcessGrid > | grid |
int | n_rows |
int | n_columns |
int | row_block_size |
int | column_block_size |
int | n_local_rows |
int | n_local_columns |
int | descriptor [9] |
std::vector< NumberType > | work |
std::vector< int > | iwork |
std::vector< int > | ipiv |
const char | uplo |
const int | first_process_row |
const int | first_process_column |
const int | submatrix_row |
const int | submatrix_column |
Threads::Mutex | mutex |
Additional Inherited Members | |
![]() | |
using | size_type = typename TableBase< 2, NumberType >::size_type |
using | value_type = typename AlignedVector< NumberType >::value_type |
using | reference = typename AlignedVector< NumberType >::reference |
using | const_reference = typename AlignedVector< NumberType >::const_reference |
using | const_iterator = TransposeTableIterators::Iterator< NumberType, true > |
using | iterator = TransposeTableIterators::Iterator< NumberType, false > |
![]() | |
using | size_type = typename AlignedVector< NumberType >::size_type |
![]() | |
reference | el (const size_type i, const size_type j) |
const_reference | el (const size_type i, const size_type j) const |
TransposeTable ()=default | |
TransposeTable (const size_type size1, const size_type size2) | |
void | reinit (const size_type size1, const size_type size2, const bool omit_default_initialization=false) |
const_reference | operator() (const size_type i, const size_type j) const |
reference | operator() (const size_type i, const size_type j) |
size_type | n_rows () const |
size_type | n_cols () const |
iterator | begin () |
const_iterator | begin () const |
iterator | end () |
const_iterator | end () const |
![]() | |
size_type | position (const TableIndices< N > &indices) const |
AlignedVector< NumberType >::reference | el (const TableIndices< N > &indices) |
AlignedVector< NumberType >::const_reference | el (const TableIndices< N > &indices) const |
TableBase ()=default | |
TableBase (const TableIndices< N > &sizes) | |
TableBase (const TableIndices< N > &sizes, InputIterator entries, const bool C_style_indexing=true) | |
TableBase (const TableBase< N, NumberType > &src) | |
TableBase (const TableBase< N, T2 > &src) | |
TableBase (TableBase< N, NumberType > &&src) noexcept | |
~TableBase () override=default | |
TableBase< N, NumberType > & | operator= (const TableBase< N, NumberType > &src) |
TableBase< N, NumberType > & | operator= (const TableBase< N, T2 > &src) |
TableBase< N, NumberType > & | operator= (TableBase< N, NumberType > &&src) noexcept |
bool | operator== (const TableBase< N, NumberType > &T2) const |
void | reset_values () |
void | reinit (const TableIndices< N > &new_size, const bool omit_default_initialization=false) |
size_type | size (const unsigned int i) const |
const TableIndices< N > & | size () const |
size_type | n_elements () const |
bool | empty () const |
void | fill (InputIterator entries, const bool C_style_indexing=true) |
void | fill (const NumberType &value) |
AlignedVector< NumberType >::reference | operator() (const TableIndices< N > &indices) |
AlignedVector< NumberType >::const_reference | operator() (const TableIndices< N > &indices) const |
void | swap (TableBase< N, NumberType > &v) |
std::size_t | memory_consumption () const |
void | serialize (Archive &ar, const unsigned int version) |
![]() | |
Subscriptor () | |
Subscriptor (const Subscriptor &) | |
Subscriptor (Subscriptor &&) noexcept | |
virtual | ~Subscriptor () |
Subscriptor & | operator= (const Subscriptor &) |
Subscriptor & | operator= (Subscriptor &&) noexcept |
void | subscribe (const char *identifier=nullptr) const |
void | unsubscribe (const char *identifier=nullptr) const |
unsigned int | n_subscriptions () const |
template<typename StreamType > | |
void | list_subscribers (StreamType &stream) const |
void | list_subscribers () const |
template<class Archive > | |
void | serialize (Archive &ar, const unsigned int version) |
![]() | |
static::ExceptionBase & | ExcInUse (int arg1, std::string arg2, std::string arg3) |
static::ExceptionBase & | ExcNoSubscriber (std::string arg1, std::string arg2) |
![]() | |
AlignedVector< NumberType > | values |
TableIndices< N > | table_size |
A wrapper class around ScaLAPACK parallel dense linear algebra.
ScaLAPACK assumes that matrices are distributed according to the block-cyclic decomposition scheme. An \(M\) by \(N\) matrix is first decomposed into \(MB\) by \(NB\) blocks which are then uniformly distributed across the 2D process grid \(p*q \le Np\), where \(p,q\) are grid dimensions and \(Np\) is the total number of processes.
For example, a global real symmetric matrix of size \(9\times 9\) is stored in upper storage mode with block sizes 4 × 4:
may be distributed using the 2x2 process grid:
with the following local arrays:
Note how processes \((0,0)\) and \((1,0)\) of the process grid store an extra column to represent the last column of the original matrix that did not fit the decomposition into \(4\times 4\) sub-blocks.
The choice of the block size is a compromise between a sufficiently large size for efficient local/serial BLAS, but one that is also small enough to achieve good parallel load balance.
Below we show a strong scaling example of ScaLAPACKMatrix::invert() on up to 5 nodes each composed of two Intel Xeon 2660v2 IvyBridge sockets 2.20GHz, 10 cores/socket. Calculations are performed on square processor grids 1x1, 2x2, 3x3, 4x4, 5x5, 6x6, 7x7, 8x8, 9x9, 10x10.
Definition at line 31 of file process_grid.h.
using ScaLAPACKMatrix< NumberType >::size_type = unsigned int |
Declare the type for container size.
Definition at line 113 of file scalapack.h.
ScaLAPACKMatrix< NumberType >::ScaLAPACKMatrix | ( | const size_type | n_rows, |
const size_type | n_columns, | ||
const std::shared_ptr< const Utilities::MPI::ProcessGrid > & | process_grid, | ||
const size_type | row_block_size = 32 , |
||
const size_type | column_block_size = 32 , |
||
const LAPACKSupport::Property | property = LAPACKSupport::Property::general |
||
) |
Constructor for a rectangular matrix with n_rows
and n_cols
and distributed using the grid process_grid
.
Definition at line 79 of file scalapack.cc.
ScaLAPACKMatrix< NumberType >::ScaLAPACKMatrix | ( | const size_type | size, |
const std::shared_ptr< const Utilities::MPI::ProcessGrid > | process_grid, | ||
const size_type | block_size = 32 , |
||
const LAPACKSupport::Property | property = LAPACKSupport::Property::symmetric |
||
) |
Constructor for a square matrix of size size
, and distributed using the process grid in process_grid
.
Definition at line 104 of file scalapack.cc.
|
overridedefault |
Destructor
void ScaLAPACKMatrix< NumberType >::reinit | ( | const size_type | n_rows, |
const size_type | n_columns, | ||
const std::shared_ptr< const Utilities::MPI::ProcessGrid > & | process_grid, | ||
const size_type | row_block_size = 32 , |
||
const size_type | column_block_size = 32 , |
||
const LAPACKSupport::Property | property = LAPACKSupport::Property::general |
||
) |
Initialize the rectangular matrix with n_rows
and n_cols
and distributed using the grid process_grid
.
Definition at line 121 of file scalapack.cc.
void ScaLAPACKMatrix< NumberType >::reinit | ( | const size_type | size, |
const std::shared_ptr< const Utilities::MPI::ProcessGrid > | process_grid, | ||
const size_type | block_size = 32 , |
||
const LAPACKSupport::Property | property = LAPACKSupport::Property::symmetric |
||
) |
Initialize the square matrix of size size
and distributed using the grid process_grid
.
Definition at line 196 of file scalapack.cc.
void ScaLAPACKMatrix< NumberType >::set_property | ( | const LAPACKSupport::Property | property | ) |
Assign property
to this matrix.
Definition at line 209 of file scalapack.cc.
LAPACKSupport::Property ScaLAPACKMatrix< NumberType >::get_property | ( | ) | const |
Return current property
of this matrix
Definition at line 219 of file scalapack.cc.
LAPACKSupport::State ScaLAPACKMatrix< NumberType >::get_state | ( | ) | const |
Return current state
of this matrix
Definition at line 228 of file scalapack.cc.
ScaLAPACKMatrix< NumberType > & ScaLAPACKMatrix< NumberType >::operator= | ( | const FullMatrix< NumberType > & | matrix | ) |
Assignment operator from a regular FullMatrix.
Definition at line 237 of file scalapack.cc.
void ScaLAPACKMatrix< NumberType >::copy_to | ( | FullMatrix< NumberType > & | matrix | ) | const |
Copy the contents of the distributed matrix into matrix
.
Definition at line 303 of file scalapack.cc.
void ScaLAPACKMatrix< NumberType >::copy_to | ( | ScaLAPACKMatrix< NumberType > & | dest | ) | const |
Copy the contents of the distributed matrix into a differently distributed matrix dest
. The function also works for matrices with different process grids or block-cyclic distributions.
Definition at line 496 of file scalapack.cc.
void ScaLAPACKMatrix< NumberType >::copy_to | ( | ScaLAPACKMatrix< NumberType > & | B, |
const std::pair< unsigned int, unsigned int > & | offset_A, | ||
const std::pair< unsigned int, unsigned int > & | offset_B, | ||
const std::pair< unsigned int, unsigned int > & | submatrix_size | ||
) | const |
Copy a submatrix (subset) of the distributed matrix A to a submatrix of the distributed matrix B
.
offset_A
with row index=offset_A.first
and column index=offset_A.second
.offset_B
with row index=offset_B.first
and column index=offset_B.second
.submatrix_size
with number of rows=submatrix_size.first
and number of columns=submatrix_size.second
.If it is necessary to copy complete matrices with an identical block-cyclic distribution, use ScaLAPACKMatrix<NumberType>::copy_to(ScaLAPACKMatrix<NumberType> &dest) with only one argument to avoid communication.
The underlying process grids of the matrices A
and B
must have been built with the same MPI communicator.
Definition at line 360 of file scalapack.cc.
void ScaLAPACKMatrix< NumberType >::copy_transposed | ( | const ScaLAPACKMatrix< NumberType > & | B | ) |
Transposing assignment: \(\mathbf{A} = \mathbf{B}^T\)
The matrices \(\mathbf{A}\) and \(\mathbf{B}\) must have the same process grid.
The following alignment conditions have to be fulfilled: \(MB_A=NB_B\) and \(NB_A=MB_B\).
Definition at line 625 of file scalapack.cc.
void ScaLAPACKMatrix< NumberType >::add | ( | const ScaLAPACKMatrix< NumberType > & | B, |
const NumberType | a = 0. , |
||
const NumberType | b = 1. , |
||
const bool | transpose_B = false |
||
) |
The operations based on the input parameter transpose_B
and the alignment conditions are summarized in the following table:
transpose_B | Block Sizes | Operation |
---|---|---|
false | \(MB_A=MB_B\) \(NB_A=NB_B\) | \(\mathbf{A} = a \mathbf{A} + b \mathbf{B}\) |
true | \(MB_A=NB_B\) \(NB_A=MB_B\) | \(\mathbf{A} = a \mathbf{A} + b \mathbf{B}^T\) |
The matrices \(\mathbf{A}\) and \(\mathbf{B}\) must have the same process grid.
Definition at line 635 of file scalapack.cc.
void ScaLAPACKMatrix< NumberType >::add | ( | const NumberType | b, |
const ScaLAPACKMatrix< NumberType > & | B | ||
) |
Matrix-addition: \(\mathbf{A} = \mathbf{A} + b\, \mathbf{B}\)
The matrices \(\mathbf{A}\) and \(\mathbf{B}\) must have the same process grid.
The following alignment conditions have to be fulfilled: \(MB_A=MB_B\) and \(NB_A=NB_B\).
Definition at line 690 of file scalapack.cc.
void ScaLAPACKMatrix< NumberType >::Tadd | ( | const NumberType | b, |
const ScaLAPACKMatrix< NumberType > & | B | ||
) |
Matrix-addition: \(\mathbf{A} = \mathbf{A} + b\, \mathbf{B}^T\)
The matrices \(\mathbf{A}\) and \(\mathbf{B}\) must have the same process grid.
The following alignment conditions have to be fulfilled: \(MB_A=NB_B\) and \(NB_A=MB_B\).
Definition at line 700 of file scalapack.cc.
void ScaLAPACKMatrix< NumberType >::mult | ( | const NumberType | b, |
const ScaLAPACKMatrix< NumberType > & | B, | ||
const NumberType | c, | ||
ScaLAPACKMatrix< NumberType > & | C, | ||
const bool | transpose_A = false , |
||
const bool | transpose_B = false |
||
) | const |
Matrix-matrix-multiplication:
The operations based on the input parameters and the alignment conditions are summarized in the following table:
transpose_A | transpose_B | Block Sizes | Operation |
---|---|---|---|
false | false | \(MB_A=MB_C\) \(NB_A=MB_B\) \(NB_B=NB_C\) | \(\mathbf{C} = b \mathbf{A} \cdot \mathbf{B} + c \mathbf{C}\) |
false | true | \(MB_A=MB_C\) \(NB_A=NB_B\) \(MB_B=NB_C\) | \(\mathbf{C} = b \mathbf{A} \cdot \mathbf{B}^T + c \mathbf{C}\) |
true | false | \(MB_A=MB_B\) \(NB_A=MB_C\) \(NB_B=NB_C\) | \(\mathbf{C} = b \mathbf{A}^T \cdot \mathbf{B} + c \mathbf{C}\) |
true | true | \(MB_A=NB_B\) \(NB_A=MB_C\) \(MB_B=NB_C\) | \(\mathbf{C} = b \mathbf{A}^T \cdot \mathbf{B}^T + c \mathbf{C}\) |
It is assumed that \(\mathbf{A}\) and \(\mathbf{B}\) have compatible sizes and that \(\mathbf{C}\) already has the right size.
The matrices \(\mathbf{A}\), \(\mathbf{B}\) and \(\mathbf{C}\) must have the same process grid.
Definition at line 710 of file scalapack.cc.
void ScaLAPACKMatrix< NumberType >::mmult | ( | ScaLAPACKMatrix< NumberType > & | C, |
const ScaLAPACKMatrix< NumberType > & | B, | ||
const bool | adding = false |
||
) | const |
Matrix-matrix-multiplication.
The optional parameter adding
determines whether the result is stored in \(\mathbf{C}\) or added to \(\mathbf{C}\).
if (adding
) \(\mathbf{C} = \mathbf{C} + \mathbf{A} \cdot \mathbf{B}\)
else \(\mathbf{C} = \mathbf{A} \cdot \mathbf{B}\)
It is assumed that \(\mathbf{A}\) and \(\mathbf{B}\) have compatible sizes and that \(\mathbf{C}\) already has the right size.
The following alignment conditions have to be fulfilled: \(MB_A=MB_C\), \(NB_A=MB_B\) and \(NB_B=NB_C\).
Definition at line 827 of file scalapack.cc.
void ScaLAPACKMatrix< NumberType >::Tmmult | ( | ScaLAPACKMatrix< NumberType > & | C, |
const ScaLAPACKMatrix< NumberType > & | B, | ||
const bool | adding = false |
||
) | const |
Matrix-matrix-multiplication using transpose of \(\mathbf{A}\).
The optional parameter adding
determines whether the result is stored in \(\mathbf{C}\) or added to \(\mathbf{C}\).
if (adding
) \(\mathbf{C} = \mathbf{C} + \mathbf{A}^T \cdot \mathbf{B}\)
else \(\mathbf{C} = \mathbf{A}^T \cdot \mathbf{B}\)
It is assumed that \(\mathbf{A}\) and \(\mathbf{B}\) have compatible sizes and that \(\mathbf{C}\) already has the right size.
The following alignment conditions have to be fulfilled: \(MB_A=MB_B\), \(NB_A=MB_C\) and \(NB_B=NB_C\).
Definition at line 841 of file scalapack.cc.
void ScaLAPACKMatrix< NumberType >::mTmult | ( | ScaLAPACKMatrix< NumberType > & | C, |
const ScaLAPACKMatrix< NumberType > & | B, | ||
const bool | adding = false |
||
) | const |
Matrix-matrix-multiplication using the transpose of \(\mathbf{B}\).
The optional parameter adding
determines whether the result is stored in \(\mathbf{C}\) or added to \(\mathbf{C}\).
if (adding
) \(\mathbf{C} = \mathbf{C} + \mathbf{A} \cdot \mathbf{B}^T\)
else \(\mathbf{C} = \mathbf{A} \cdot \mathbf{B}^T\)
It is assumed that \(\mathbf{A}\) and \(\mathbf{B}\) have compatible sizes and that \(\mathbf{C}\) already has the right size.
The following alignment conditions have to be fulfilled: \(MB_A=MB_C\), \(NB_A=NB_B\) and \(MB_B=NB_C\).
Definition at line 855 of file scalapack.cc.
void ScaLAPACKMatrix< NumberType >::TmTmult | ( | ScaLAPACKMatrix< NumberType > & | C, |
const ScaLAPACKMatrix< NumberType > & | B, | ||
const bool | adding = false |
||
) | const |
Matrix-matrix-multiplication using transpose of \(\mathbf{A}\) and \(\mathbf{B}\).
The optional parameter adding
determines whether the result is stored in \(\mathbf{C}\) or added to \(\mathbf{C}\).
if (adding
) \(\mathbf{C} = \mathbf{C} + \mathbf{A}^T \cdot \mathbf{B}^T\)
else \(\mathbf{C} = \mathbf{A}^T \cdot \mathbf{B}^T\)
It is assumed that \(\mathbf{A}\) and \(\mathbf{B}\) have compatible sizes and that \(\mathbf{C}\) already has the right size.
The following alignment conditions have to be fulfilled: \(MB_A=NB_B\), \(NB_A=MB_C\) and \(MB_B=NB_C\).
Definition at line 869 of file scalapack.cc.
void ScaLAPACKMatrix< NumberType >::save | ( | const char * | filename, |
const std::pair< unsigned int, unsigned int > & | chunk_size = std::make_pair(numbers::invalid_unsigned_int, numbers::invalid_unsigned_int) |
||
) | const |
Stores the distributed matrix in filename
using HDF5.
In case that deal.II was built without HDF5 a call to this function will cause an exception to be thrown.
If HDF5 was built with MPI, parallel I/O is used to save the matrix. Otherwise, just one process will do the output. This means that internally the distributed matrix is copied to one process, which does the output. Therefore, the matrix has to fit into the memory of one process.
To tweak the I/O performance, especially for parallel I/O, the user may define the optional parameter chunk_size
. All MPI processes need to call the function with the same value. The matrix is written in chunks to the file, therefore the properties of the system define the optimal chunk size. Internally, HDF5 splits the matrix into chunk_size.first
x chunk_size.second
sized blocks, with chunk_size.first
being the number of rows of a chunk and chunk_size.second
the number of columns.
Definition at line 2256 of file scalapack.cc.
void ScaLAPACKMatrix< NumberType >::load | ( | const char * | filename | ) |
Loads the distributed matrix from file filename
using HDF5. In case that deal.II was built without HDF5 a call to this function will cause an exception to be thrown.
The matrix must have the same dimensions as the matrix stored in the file.
If HDF5 was build with MPI, parallel I/O is used to load the matrix. Otherwise, just one process will load the matrix from storage and distribute the content to the other processes subsequently.
Definition at line 2673 of file scalapack.cc.
void ScaLAPACKMatrix< NumberType >::compute_cholesky_factorization | ( | ) |
Compute the Cholesky factorization of the matrix using ScaLAPACK function pXpotrf
. The result of the factorization is stored in this object.
Definition at line 883 of file scalapack.cc.
void ScaLAPACKMatrix< NumberType >::compute_lu_factorization | ( | ) |
Compute the LU factorization of the matrix using ScaLAPACK function pXgetrf
and partial pivoting with row interchanges. The result of the factorization is stored in this object.
Definition at line 916 of file scalapack.cc.
void ScaLAPACKMatrix< NumberType >::invert | ( | ) |
Invert the matrix by first computing a Cholesky for symmetric matrices or a LU factorization for general matrices and then building the actual inverse using pXpotri
or pXgetri
. If the matrix is triangular, the LU factorization step is skipped, and pXtrtri
is used directly.
If a Cholesky or LU factorization has been applied previously, pXpotri
or pXgetri
are called directly.
The inverse is stored in this object.
Definition at line 957 of file scalapack.cc.
std::vector< NumberType > ScaLAPACKMatrix< NumberType >::eigenpairs_symmetric_by_index | ( | const std::pair< unsigned int, unsigned int > & | index_limits, |
const bool | compute_eigenvectors | ||
) |
Computing selected eigenvalues and, optionally, the eigenvectors of the real symmetric matrix \(\mathbf{A} \in \mathbb{R}^{M \times M}\).
The eigenvalues/eigenvectors are selected by prescribing a range of indices index_limits
.
If successful, the computed eigenvalues are arranged in ascending order. The eigenvectors are stored in the columns of the matrix, thereby overwriting the original content of the matrix.
If all eigenvalues/eigenvectors have to be computed, pass the closed interval \( \left[ 0, M-1 \right] \) in index_limits
.
Pass the closed interval \( \left[ M-r, M-1 \right] \) if the \(r\) largest eigenvalues/eigenvectors are desired.
Definition at line 1073 of file scalapack.cc.
std::vector< NumberType > ScaLAPACKMatrix< NumberType >::eigenpairs_symmetric_by_value | ( | const std::pair< NumberType, NumberType > & | value_limits, |
const bool | compute_eigenvectors | ||
) |
Computing selected eigenvalues and, optionally, the eigenvectors. The eigenvalues/eigenvectors are selected by prescribing a range of values value_limits
for the eigenvalues.
If successful, the computed eigenvalues are arranged in ascending order. The eigenvectors are stored in the columns of the matrix, thereby overwriting the original content of the matrix.
Definition at line 1098 of file scalapack.cc.
std::vector< NumberType > ScaLAPACKMatrix< NumberType >::eigenpairs_symmetric_by_index_MRRR | ( | const std::pair< unsigned int, unsigned int > & | index_limits, |
const bool | compute_eigenvectors | ||
) |
Computing selected eigenvalues and, optionally, the eigenvectors of the real symmetric matrix \(\mathbf{A} \in \mathbb{R}^{M \times M}\) using the MRRR algorithm.
The eigenvalues/eigenvectors are selected by prescribing a range of indices index_limits
.
If successful, the computed eigenvalues are arranged in ascending order. The eigenvectors are stored in the columns of the matrix, thereby overwriting the original content of the matrix.
If all eigenvalues/eigenvectors have to be computed, pass the closed interval \( \left[ 0, M-1 \right] \) in index_limits
.
Pass the closed interval \( \left[ M-r, M-1 \right] \) if the \(r\) largest eigenvalues/eigenvectors are desired.
Definition at line 1408 of file scalapack.cc.
std::vector< NumberType > ScaLAPACKMatrix< NumberType >::eigenpairs_symmetric_by_value_MRRR | ( | const std::pair< NumberType, NumberType > & | value_limits, |
const bool | compute_eigenvectors | ||
) |
Computing selected eigenvalues and, optionally, the eigenvectors of the real symmetric matrix \(\mathbf{A} \in \mathbb{R}^{M \times M}\) using the MRRR algorithm. The eigenvalues/eigenvectors are selected by prescribing a range of values value_limits
for the eigenvalues.
If successful, the computed eigenvalues are arranged in ascending order. The eigenvectors are stored in the columns of the matrix, thereby overwriting the original content of the matrix.
Definition at line 1431 of file scalapack.cc.
std::vector< NumberType > ScaLAPACKMatrix< NumberType >::compute_SVD | ( | ScaLAPACKMatrix< NumberType > * | U = nullptr , |
ScaLAPACKMatrix< NumberType > * | VT = nullptr |
||
) |
Computing the singular value decomposition (SVD) of a matrix \(\mathbf{A} \in \mathbb{R}^{M \times N}\), optionally computing the left and/or right singular vectors. The SVD is written as \(\mathbf{A} = \mathbf{U} \cdot \mathbf{\Sigma} \cdot \mathbf{V}^T\) with \(\mathbf{\Sigma} \in \mathbb{R}^{M \times N}\) as a diagonal matrix, \(\mathbf{U} \in \mathbb{R}^{M \times M}\) and \(\mathbf{V} \in \mathbb{R}^{M \times M}\) as orthogonal matrices. The diagonal elements of \(\mathbf{\Sigma}\) are the singular values of \(A\) and the columns of \(\mathbf{U}\) and \(\mathbf{V}\) are the corresponding left and right singular vectors, respectively. The singular values are returned in decreasing order and only the first \(\min(M,N)\) columns of \(\mathbf{U}\) and rows of \(\mathbf{V}^T\) are computed.
Upon return the content of the matrix is unusable. The matrix \(\mathbf{A}\) must have identical block cyclic distribution for the rows and column.
If left singular vectors are required matrices \(\mathbf{A}\) and \(\mathbf{U}\) have to be constructed with the same process grid and block cyclic distribution. If right singular vectors are required matrices \(\mathbf{A}\) and \(\mathbf{V}^T\) have to be constructed with the same process grid and block cyclic distribution.
To avoid computing the left and/or right singular vectors the function accepts nullptr
for U
and/or VT
.
Definition at line 1659 of file scalapack.cc.
void ScaLAPACKMatrix< NumberType >::least_squares | ( | ScaLAPACKMatrix< NumberType > & | B, |
const bool | transpose = false |
||
) |
Solving overdetermined or underdetermined real linear systems involving matrix \(\mathbf{A} \in \mathbb{R}^{M \times N}\), or its transpose \(\mathbf{A}^T\), using a QR or LQ factorization of \(\mathbf{A}\) for \(N_{\rm RHS}\) RHS vectors in the columns of matrix \(\mathbf{B}\)
It is assumed that \(\mathbf{A}\) has full rank: \(\rm{rank}(\mathbf{A}) = \min(M,N)\).
The following options are supported:
If(!tranpose) then \(\mathbf{B} \in \mathbb{R}^{M \times N_{\rm RHS}}\), otherwise \(\mathbf{B} \in \mathbb{R}^{N \times N_{\rm RHS}}\). The matrices \(\mathbf{A}\) and \(\mathbf{B}\) must have an identical block cyclic distribution for rows and columns.
Definition at line 1779 of file scalapack.cc.
unsigned int ScaLAPACKMatrix< NumberType >::pseudoinverse | ( | const NumberType | ratio | ) |
Compute the pseudoinverse \(\mathbf{A}^+ \in \mathbb{R}^{N \times M}\) (Moore-Penrose inverse) of a real matrix \(\mathbf{A} \in \mathbb{R}^{M \times N}\) using the singular value decomposition \(\mathbf{A} = \mathbf{U} \cdot \mathbf{\Sigma} \cdot \mathbf{V}^T\).
Unlike the inverse, the pseudoinverse \(\mathbf{A}^+ = \mathbf{V} \cdot \mathbf{\Sigma}^+ \cdot \mathbf{U}^T\) exists for both rectangular as well as singular matrices \(\mathbf{A}\).
For a rectangular \(\mathbf{\Sigma}\) the pseudoinverse is computed by taking the reciprocal of each non-zero element on the diagonal, leaving the zeros in place, and then transposing \(\mathbf{\Sigma}\). For the numerical computation only the singular values \(\sigma_i > \sigma_{\text{max}} \, \text{ratio}\) are taken into account. Upon successful exit, the function returns the number of singular values fulfilling that condition. That value can be interpreted as the rank of \(\mathbf{A}\).
Upon return this object contains the pseudoinverse \(\mathbf{A}^+ \in \mathbb{R}^{N \times M}\).
The following alignment conditions have to be fulfilled: \(MB_A = NB_A\).
Definition at line 1871 of file scalapack.cc.
NumberType ScaLAPACKMatrix< NumberType >::reciprocal_condition_number | ( | const NumberType | a_norm | ) | const |
Estimate the condition number of a SPD matrix in the \(l_1\)-norm. The matrix has to be in the Cholesky state (see compute_cholesky_factorization()). The reciprocal of the condition number is returned in order to avoid the possibility of overflow when the condition number is very large.
a_norm
must contain the \(l_1\)-norm of the matrix prior to calling Cholesky factorization (see l1_norm()).
Definition at line 1958 of file scalapack.cc.
NumberType ScaLAPACKMatrix< NumberType >::l1_norm | ( | ) | const |
Compute the \(l_1\)-norm of the matrix.
Definition at line 2020 of file scalapack.cc.
NumberType ScaLAPACKMatrix< NumberType >::linfty_norm | ( | ) | const |
Compute the \(l_{\infty}\) norm of the matrix.
Definition at line 2034 of file scalapack.cc.
NumberType ScaLAPACKMatrix< NumberType >::frobenius_norm | ( | ) | const |
Compute the Frobenius norm of the matrix.
Definition at line 2048 of file scalapack.cc.
size_type ScaLAPACKMatrix< NumberType >::m | ( | ) | const |
Number of rows of the \(M \times N\) matrix.
size_type ScaLAPACKMatrix< NumberType >::n | ( | ) | const |
Number of columns of the \(M \times N\) matrix.
unsigned int ScaLAPACKMatrix< NumberType >::local_m | ( | ) | const |
Number of local rows on this MPI processes.
unsigned int ScaLAPACKMatrix< NumberType >::local_n | ( | ) | const |
Number of local columns on this MPI process.
unsigned int ScaLAPACKMatrix< NumberType >::global_row | ( | const unsigned int | loc_row | ) | const |
Return the global row number for the given local row loc_row
.
Definition at line 268 of file scalapack.cc.
unsigned int ScaLAPACKMatrix< NumberType >::global_column | ( | const unsigned int | loc_column | ) | const |
Return the global column number for the given local column loc_column
.
Definition at line 285 of file scalapack.cc.
NumberType ScaLAPACKMatrix< NumberType >::local_el | ( | const unsigned int | loc_row, |
const unsigned int | loc_column | ||
) | const |
Read access to local element.
NumberType& ScaLAPACKMatrix< NumberType >::local_el | ( | const unsigned int | loc_row, |
const unsigned int | loc_column | ||
) |
Write access to local element.
void ScaLAPACKMatrix< NumberType >::scale_columns | ( | const InputVector & | factors | ) |
Scale the columns of the distributed matrix by the scalars provided in the array factors
.
The array factors
must have as many entries as the matrix columns.
Copies of factors
have to be available on all processes of the underlying MPI communicator.
InputVector
is that it must be possible to create an ArrayView from it; this is satisfied by the std::vector
and Vector classes. Definition at line 3121 of file scalapack.cc.
void ScaLAPACKMatrix< NumberType >::scale_rows | ( | const InputVector & | factors | ) |
Scale the rows of the distributed matrix by the scalars provided in the array factors
.
The array factors
must have as many entries as the matrix rows.
Copies of factors
have to be available on all processes of the underlying MPI communicator.
InputVector
is that it must be possible to create an ArrayView from it; this is satisfied by the std::vector
and Vector classes. Definition at line 3132 of file scalapack.cc.
|
private |
Calculate the norm of a distributed symmetric dense matrix using ScaLAPACK's internal function.
Definition at line 2121 of file scalapack.cc.
|
private |
Calculate the norm of a distributed dense matrix using ScaLAPACK's internal function.
Definition at line 2062 of file scalapack.cc.
|
private |
Computing selected eigenvalues and, optionally, the eigenvectors. The eigenvalues/eigenvectors are selected by either prescribing a range of indices index_limits
or a range of values value_limits
for the eigenvalues. The function will throw an exception if both ranges are prescribed (meaning that both ranges differ from the default value) as this ambiguity is prohibited. If successful, the computed eigenvalues are arranged in ascending order. The eigenvectors are stored in the columns of the matrix, thereby overwriting the original content of the matrix.
Definition at line 1118 of file scalapack.cc.
|
private |
Computing selected eigenvalues and, optionally, the eigenvectors of the real symmetric matrix \(\mathbf{A} \in \mathbb{R}^{M \times M}\) using the MRRR algorithm. The eigenvalues/eigenvectors are selected by either prescribing a range of indices index_limits
or a range of values value_limits
for the eigenvalues. The function will throw an exception if both ranges are prescribed (meaning that both ranges differ from the default value) as this ambiguity is prohibited.
By calling this function the original content of the matrix will be overwritten. If requested, the eigenvectors are stored in the columns of the matrix. Also in the case that just the eigenvalues are required, the content of the matrix will be overwritten.
If successful, the computed eigenvalues are arranged in ascending order.
index_limits
has to be set accordingly. Using Intel-MKL this restriction is not required. Definition at line 1449 of file scalapack.cc.
|
private |
Since ScaLAPACK operations notoriously change the meaning of the matrix entries, we record the current state after the last operation here.
Definition at line 859 of file scalapack.h.
|
private |
Additional property of the matrix which may help to select more efficient ScaLAPACK functions.
Definition at line 865 of file scalapack.h.
|
private |
A shared pointer to a Utilities::MPI::ProcessGrid object which contains a BLACS context and a MPI communicator, as well as other necessary data structures.
Definition at line 872 of file scalapack.h.
|
private |
Number of rows in the matrix.
Definition at line 877 of file scalapack.h.
|
private |
Number of columns in the matrix.
Definition at line 882 of file scalapack.h.
|
private |
Row block size.
Definition at line 887 of file scalapack.h.
|
private |
Column block size.
Definition at line 892 of file scalapack.h.
|
private |
Number of rows in the matrix owned by the current process.
Definition at line 897 of file scalapack.h.
|
private |
Number of columns in the matrix owned by the current process.
Definition at line 902 of file scalapack.h.
|
private |
ScaLAPACK description vector.
Definition at line 907 of file scalapack.h.
|
mutableprivate |
Workspace array.
Definition at line 912 of file scalapack.h.
|
mutableprivate |
Integer workspace array.
Definition at line 917 of file scalapack.h.
|
private |
Integer array holding pivoting information required by ScaLAPACK's matrix factorization routines.
Definition at line 923 of file scalapack.h.
|
private |
A character to define where elements are stored in case ScaLAPACK operations support this.
Definition at line 929 of file scalapack.h.
|
private |
The process row of the process grid over which the first row of the global matrix is distributed.
Definition at line 935 of file scalapack.h.
|
private |
The process column of the process grid over which the first column of the global matrix is distributed.
Definition at line 941 of file scalapack.h.
|
private |
Global row index that determines where to start a submatrix. Currently this equals unity, as we don't use submatrices.
Definition at line 947 of file scalapack.h.
|
private |
Global column index that determines where to start a submatrix. Currently this equals unity, as we don't use submatrices.
Definition at line 953 of file scalapack.h.
|
mutableprivate |
Thread mutex.
Definition at line 958 of file scalapack.h.