Reference documentation for deal.II version 9.1.0-pre
scalapack.cc
1 // ---------------------------------------------------------------------
2 //
3 // Copyright (C) 2017 - 2018 by the deal.II authors
4 //
5 // This file is part of the deal.II library.
6 //
7 // The deal.II library is free software; you can use it, redistribute
8 // it, and/or modify it under the terms of the GNU Lesser General
9 // Public License as published by the Free Software Foundation; either
10 // version 2.1 of the License, or (at your option) any later version.
11 // The full text of the license can be found in the file LICENSE.md at
12 // the top level directory of deal.II.
13 //
14 // ---------------------------------------------------------------------
15 
16 
17 #include <deal.II/lac/scalapack.h>
18 
19 #ifdef DEAL_II_WITH_SCALAPACK
20 
21 # include <deal.II/base/array_view.h>
22 # include <deal.II/base/mpi.h>
23 # include <deal.II/base/mpi.templates.h>
24 # include <deal.II/base/std_cxx14/memory.h>
25 
26 # include <deal.II/lac/scalapack.templates.h>
27 
28 # ifdef DEAL_II_WITH_HDF5
29 # include <hdf5.h>
30 # endif
31 
32 DEAL_II_NAMESPACE_OPEN
33 
34 # ifdef DEAL_II_WITH_HDF5
35 
36 template <typename number>
37 inline hid_t
38 hdf5_type_id(const number *)
39 {
40  Assert(false, ::ExcNotImplemented());
41  // don't know what to put here; it does not matter
42  return -1;
43 }
44 
45 inline hid_t
46 hdf5_type_id(const double *)
47 {
48  return H5T_NATIVE_DOUBLE;
49 }
50 
51 inline hid_t
52 hdf5_type_id(const float *)
53 {
54  return H5T_NATIVE_FLOAT;
55 }
56 
57 inline hid_t
58 hdf5_type_id(const int *)
59 {
60  return H5T_NATIVE_INT;
61 }
62 
63 inline hid_t
64 hdf5_type_id(const unsigned int *)
65 {
66  return H5T_NATIVE_UINT;
67 }
68 
69 inline hid_t
70 hdf5_type_id(const char *)
71 {
72  return H5T_NATIVE_CHAR;
73 }
74 # endif // DEAL_II_WITH_HDF5
75 
76 
77 
78 template <typename NumberType>
80  const size_type n_rows_,
81  const size_type n_columns_,
82  const std::shared_ptr<const Utilities::MPI::ProcessGrid> &process_grid,
83  const size_type row_block_size_,
84  const size_type column_block_size_,
85  const LAPACKSupport::Property property_)
86  : uplo('L')
87  , // for non-symmetric matrices this is not needed
88  first_process_row(0)
89  , first_process_column(0)
90  , submatrix_row(1)
91  , submatrix_column(1)
92 {
93  reinit(n_rows_,
94  n_columns_,
95  process_grid,
96  row_block_size_,
97  column_block_size_,
98  property_);
99 }
100 
101 
102 
103 template <typename NumberType>
105  const size_type size,
106  const std::shared_ptr<const Utilities::MPI::ProcessGrid> process_grid,
107  const size_type block_size,
109  : ScaLAPACKMatrix<NumberType>(size,
110  size,
111  process_grid,
112  block_size,
113  block_size,
114  property)
115 {}
116 
117 
118 
119 template <typename NumberType>
120 void
122  const size_type n_rows_,
123  const size_type n_columns_,
124  const std::shared_ptr<const Utilities::MPI::ProcessGrid> &process_grid,
125  const size_type row_block_size_,
126  const size_type column_block_size_,
127  const LAPACKSupport::Property property_)
128 {
129  Assert(row_block_size_ > 0, ExcMessage("Row block size has to be positive."));
130  Assert(column_block_size_ > 0,
131  ExcMessage("Column block size has to be positive."));
132  Assert(
133  row_block_size_ <= n_rows_,
134  ExcMessage(
135  "Row block size can not be greater than the number of rows of the matrix"));
136  Assert(
137  column_block_size_ <= n_columns_,
138  ExcMessage(
139  "Column block size can not be greater than the number of columns of the matrix"));
140 
141  state = LAPACKSupport::State::matrix;
142  property = property_;
143  grid = process_grid;
144  n_rows = n_rows_;
145  n_columns = n_columns_;
146  row_block_size = row_block_size_;
147  column_block_size = column_block_size_;
148 
149  if (grid->mpi_process_is_active)
150  {
151  // Get local sizes:
152  n_local_rows = numroc_(&n_rows,
154  &(grid->this_process_row),
156  &(grid->n_process_rows));
157  n_local_columns = numroc_(&n_columns,
159  &(grid->this_process_column),
161  &(grid->n_process_columns));
162 
163  // LLD_A = MAX(1,NUMROC(M_A, MB_A, MYROW, RSRC_A, NPROW)), different
164  // between processes
165  int lda = std::max(1, n_local_rows);
166 
167  int info = 0;
168  descinit_(descriptor,
169  &n_rows,
170  &n_columns,
175  &(grid->blacs_context),
176  &lda,
177  &info);
178  AssertThrow(info == 0, LAPACKSupport::ExcErrorCode("descinit_", info));
179 
181  }
182  else
183  {
184  // set process-local variables to something telling:
185  n_local_rows = -1;
186  n_local_columns = -1;
187  for (unsigned int i = 0; i < 9; ++i)
188  descriptor[i] = -1;
189  }
190 }
191 
192 
193 
194 template <typename NumberType>
195 void
197  const size_type size,
198  const std::shared_ptr<const Utilities::MPI::ProcessGrid> process_grid,
199  const size_type block_size,
201 {
202  reinit(size, size, process_grid, block_size, block_size, property);
203 }
204 
205 
206 
207 template <typename NumberType>
208 void
210  const LAPACKSupport::Property property_)
211 {
212  property = property_;
213 }
214 
215 
216 
217 template <typename NumberType>
220 {
221  return property;
222 }
223 
224 
225 
226 template <typename NumberType>
229 {
230  return state;
231 }
232 
233 
234 
235 template <typename NumberType>
238 {
239  // FIXME: another way to copy is to use pdgeadd_ PBLAS routine.
240  // This routine computes the sum of two matrices B:=a*A+b*B.
241  // Matrices can have different distribution,in particular matrix A can
242  // be owned by only one process, so we can set a=1 and b=0 to copy
243  // non-distributed matrix A into distributed matrix B.
244  Assert(n_rows == int(matrix.m()), ExcDimensionMismatch(n_rows, matrix.m()));
245  Assert(n_columns == int(matrix.n()),
246  ExcDimensionMismatch(n_columns, matrix.n()));
247 
248  if (grid->mpi_process_is_active)
249  {
250  for (int i = 0; i < n_local_rows; ++i)
251  {
252  const int glob_i = global_row(i);
253  for (int j = 0; j < n_local_columns; ++j)
254  {
255  const int glob_j = global_column(j);
256  local_el(i, j) = matrix(glob_i, glob_j);
257  }
258  }
259  }
261  return *this;
262 }
263 
264 
265 
266 template <typename NumberType>
267 unsigned int
268 ScaLAPACKMatrix<NumberType>::global_row(const unsigned int loc_row) const
269 {
270  Assert(n_local_rows >= 0 && loc_row < static_cast<unsigned int>(n_local_rows),
271  ExcIndexRange(loc_row, 0, n_local_rows));
272  const int i = loc_row + 1;
273  return indxl2g_(&i,
275  &(grid->this_process_row),
277  &(grid->n_process_rows)) -
278  1;
279 }
280 
281 
282 
283 template <typename NumberType>
284 unsigned int
285 ScaLAPACKMatrix<NumberType>::global_column(const unsigned int loc_column) const
286 {
287  Assert(n_local_columns >= 0 &&
288  loc_column < static_cast<unsigned int>(n_local_columns),
289  ExcIndexRange(loc_column, 0, n_local_columns));
290  const int j = loc_column + 1;
291  return indxl2g_(&j,
293  &(grid->this_process_column),
295  &(grid->n_process_columns)) -
296  1;
297 }
298 
299 
300 
301 template <typename NumberType>
302 void
304 {
305  // FIXME: use PDGEMR2D for copying?
306  // PDGEMR2D copies a submatrix of A on a submatrix of B.
307  // A and B can have different distributions
308  // see http://icl.cs.utk.edu/lapack-forum/viewtopic.php?t=50
309  Assert(n_rows == int(matrix.m()), ExcDimensionMismatch(n_rows, matrix.m()));
310  Assert(n_columns == int(matrix.n()),
311  ExcDimensionMismatch(n_columns, matrix.n()));
312 
313  matrix = 0.;
314  if (grid->mpi_process_is_active)
315  {
316  for (int i = 0; i < n_local_rows; ++i)
317  {
318  const int glob_i = global_row(i);
319  for (int j = 0; j < n_local_columns; ++j)
320  {
321  const int glob_j = global_column(j);
322  matrix(glob_i, glob_j) = local_el(i, j);
323  }
324  }
325  }
326  Utilities::MPI::sum(matrix, grid->mpi_communicator, matrix);
327 
328  // we could move the following lines under the main loop above,
329  // but they would be dependent on glob_i and glob_j, which
330  // won't make it much prettier
332  {
334  for (unsigned int i = 0; i < matrix.n(); ++i)
335  for (unsigned int j = i + 1; j < matrix.m(); ++j)
336  matrix(i, j) = 0.;
338  for (unsigned int i = 0; i < matrix.n(); ++i)
339  for (unsigned int j = 0; j < i; ++j)
340  matrix(i, j) = 0.;
341  }
342  else if (property == LAPACKSupport::symmetric &&
344  {
345  if (uplo == 'L')
346  for (unsigned int i = 0; i < matrix.n(); ++i)
347  for (unsigned int j = i + 1; j < matrix.m(); ++j)
348  matrix(i, j) = matrix(j, i);
349  else if (uplo == 'U')
350  for (unsigned int i = 0; i < matrix.n(); ++i)
351  for (unsigned int j = 0; j < i; ++j)
352  matrix(i, j) = matrix(j, i);
353  }
354 }
355 
356 
357 
358 template <typename NumberType>
359 void
362  const std::pair<unsigned int, unsigned int> &offset_A,
363  const std::pair<unsigned int, unsigned int> &offset_B,
364  const std::pair<unsigned int, unsigned int> &submatrix_size) const
365 {
366  // submatrix is empty
367  if (submatrix_size.first == 0 || submatrix_size.second == 0)
368  return;
369 
370  // range checking for matrix A
371  Assert(offset_A.first < (unsigned int)(n_rows - submatrix_size.first + 1),
372  ExcIndexRange(offset_A.first, 0, n_rows - submatrix_size.first + 1));
373  Assert(
374  offset_A.second < (unsigned int)(n_columns - submatrix_size.second + 1),
375  ExcIndexRange(offset_A.second, 0, n_columns - submatrix_size.second + 1));
376 
377  // range checking for matrix B
378  Assert(offset_B.first < (unsigned int)(B.n_rows - submatrix_size.first + 1),
379  ExcIndexRange(offset_B.first, 0, B.n_rows - submatrix_size.first + 1));
380  Assert(
381  offset_B.second < (unsigned int)(B.n_columns - submatrix_size.second + 1),
382  ExcIndexRange(offset_B.second, 0, B.n_columns - submatrix_size.second + 1));
383 
384  // Currently, copying of matrices will only be supported if A and B share the
385  // same MPI communicator
386  int ierr, comparison;
387  ierr = MPI_Comm_compare(grid->mpi_communicator,
388  B.grid->mpi_communicator,
389  &comparison);
390  AssertThrowMPI(ierr);
391  Assert(comparison == MPI_IDENT,
392  ExcMessage("Matrix A and B must have a common MPI Communicator"));
393 
394  /*
395  * The routine pgemr2d requires a BLACS context resembling at least the union
396  * of process grids described by the BLACS contexts held by the ProcessGrids
397  * of matrix A and B. As A and B share the same MPI communicator, there is no
398  * need to create a union MPI communicator to initialise the BLACS context
399  */
400  int union_blacs_context = Csys2blacs_handle(this->grid->mpi_communicator);
401  const char *order = "Col";
402  int union_n_process_rows =
403  Utilities::MPI::n_mpi_processes(this->grid->mpi_communicator);
404  int union_n_process_columns = 1;
405  Cblacs_gridinit(&union_blacs_context,
406  order,
407  union_n_process_rows,
408  union_n_process_columns);
409 
410  int n_grid_rows_A, n_grid_columns_A, my_row_A, my_column_A;
411  Cblacs_gridinfo(this->grid->blacs_context,
412  &n_grid_rows_A,
413  &n_grid_columns_A,
414  &my_row_A,
415  &my_column_A);
416 
417  // check whether process is in the BLACS context of matrix A
418  const bool in_context_A =
419  (my_row_A >= 0 && my_row_A < n_grid_rows_A) &&
420  (my_column_A >= 0 && my_column_A < n_grid_columns_A);
421 
422 
423  int n_grid_rows_B, n_grid_columns_B, my_row_B, my_column_B;
424  Cblacs_gridinfo(B.grid->blacs_context,
425  &n_grid_rows_B,
426  &n_grid_columns_B,
427  &my_row_B,
428  &my_column_B);
429 
430  // check whether process is in the BLACS context of matrix B
431  const bool in_context_B =
432  (my_row_B >= 0 && my_row_B < n_grid_rows_B) &&
433  (my_column_B >= 0 && my_column_B < n_grid_columns_B);
434 
435  const int n_rows_submatrix = submatrix_size.first;
436  const int n_columns_submatrix = submatrix_size.second;
437 
438  // due to Fortran indexing one has to be added
439  int ia = offset_A.first + 1, ja = offset_A.second + 1;
440  int ib = offset_B.first + 1, jb = offset_B.second + 1;
441 
442  std::array<int, 9> desc_A, desc_B;
443 
444  const NumberType *loc_vals_A = nullptr;
445  NumberType * loc_vals_B = nullptr;
446 
447  // Note: the function pgemr2d has to be called for all processes in the union
448  // BLACS context If the calling process is not part of the BLACS context of A,
449  // desc_A[1] has to be -1 and all other parameters do not have to be set If
450  // the calling process is not part of the BLACS context of B, desc_B[1] has to
451  // be -1 and all other parameters do not have to be set
452  if (in_context_A)
453  {
454  if (this->values.size() != 0)
455  loc_vals_A = &this->values[0];
456 
457  for (unsigned int i = 0; i < desc_A.size(); ++i)
458  desc_A[i] = this->descriptor[i];
459  }
460  else
461  desc_A[1] = -1;
462 
463  if (in_context_B)
464  {
465  if (B.values.size() != 0)
466  loc_vals_B = &B.values[0];
467 
468  for (unsigned int i = 0; i < desc_B.size(); ++i)
469  desc_B[i] = B.descriptor[i];
470  }
471  else
472  desc_B[1] = -1;
473 
474  pgemr2d(&n_rows_submatrix,
475  &n_columns_submatrix,
476  loc_vals_A,
477  &ia,
478  &ja,
479  desc_A.data(),
480  loc_vals_B,
481  &ib,
482  &jb,
483  desc_B.data(),
484  &union_blacs_context);
485 
487 
488  // releasing the union BLACS context
489  Cblacs_gridexit(union_blacs_context);
490 }
491 
492 
493 
494 template <typename NumberType>
495 void
497 {
499  Assert(n_columns == dest.n_columns,
501 
502  if (this->grid->mpi_process_is_active)
503  AssertThrow(
504  this->descriptor[0] == 1,
505  ExcMessage(
506  "Copying of ScaLAPACK matrices only implemented for dense matrices"));
507  if (dest.grid->mpi_process_is_active)
508  AssertThrow(
509  dest.descriptor[0] == 1,
510  ExcMessage(
511  "Copying of ScaLAPACK matrices only implemented for dense matrices"));
512 
513  /*
514  * just in case of different process grids or block-cyclic distributions
515  * inter-process communication is necessary
516  * if distributed matrices have the same process grid and block sizes, local
517  * copying is enough
518  */
519  if ((this->grid != dest.grid) || (row_block_size != dest.row_block_size) ||
521  {
522  /*
523  * get the MPI communicator, which is the union of the source and
524  * destination MPI communicator
525  */
526  int ierr = 0;
527  MPI_Group group_source, group_dest, group_union;
528  ierr = MPI_Comm_group(this->grid->mpi_communicator, &group_source);
529  AssertThrowMPI(ierr);
530  ierr = MPI_Comm_group(dest.grid->mpi_communicator, &group_dest);
531  AssertThrowMPI(ierr);
532  ierr = MPI_Group_union(group_source, group_dest, &group_union);
533  AssertThrowMPI(ierr);
534  MPI_Comm mpi_communicator_union;
535 
536  // to create a communicator representing the union of the source
537  // and destination MPI communicator we need a communicator that
538  // is guaranteed to contain all desired processes -- i.e.,
539  // MPI_COMM_WORLD. on the other hand, as documented in the MPI
540  // standard, MPI_Comm_create_group is not collective on all
541  // processes in the first argument, but instead is collective on
542  // only those processes listed in the group. in other words,
543  // there is really no harm in passing MPI_COMM_WORLD as the
544  // first argument, even if the program we are currently running
545  // and that is calling this function only works on a subset of
546  // processes. the same holds for the wrapper/fallback we are using here.
547  ierr = Utilities::MPI::create_group(MPI_COMM_WORLD,
548  group_union,
549  5,
550  &mpi_communicator_union);
551  AssertThrowMPI(ierr);
552 
553  /*
554  * The routine pgemr2d requires a BLACS context resembling at least the
555  * union of process grids described by the BLACS contexts of matrix A and
556  * B
557  */
558  int union_blacs_context = Csys2blacs_handle(mpi_communicator_union);
559  const char *order = "Col";
560  int union_n_process_rows =
561  Utilities::MPI::n_mpi_processes(mpi_communicator_union);
562  int union_n_process_columns = 1;
563  Cblacs_gridinit(&union_blacs_context,
564  order,
565  union_n_process_rows,
566  union_n_process_columns);
567 
568  const NumberType *loc_vals_source = nullptr;
569  NumberType * loc_vals_dest = nullptr;
570 
571  if (this->grid->mpi_process_is_active && (this->values.size() > 0))
572  {
573  AssertThrow(this->values.size() > 0,
574  ::ExcMessage(
575  "source: process is active but local matrix empty"));
576  loc_vals_source = &this->values[0];
577  }
578  if (dest.grid->mpi_process_is_active && (dest.values.size() > 0))
579  {
580  AssertThrow(
581  dest.values.size() > 0,
582  ::ExcMessage(
583  "destination: process is active but local matrix empty"));
584  loc_vals_dest = &dest.values[0];
585  }
586  pgemr2d(&n_rows,
587  &n_columns,
588  loc_vals_source,
589  &submatrix_row,
591  descriptor,
592  loc_vals_dest,
593  &dest.submatrix_row,
594  &dest.submatrix_column,
595  dest.descriptor,
596  &union_blacs_context);
597 
598  Cblacs_gridexit(union_blacs_context);
599 
600  if (mpi_communicator_union != MPI_COMM_NULL)
601  {
602  ierr = MPI_Comm_free(&mpi_communicator_union);
603  AssertThrowMPI(ierr);
604  }
605  ierr = MPI_Group_free(&group_source);
606  AssertThrowMPI(ierr);
607  ierr = MPI_Group_free(&group_dest);
608  AssertThrowMPI(ierr);
609  ierr = MPI_Group_free(&group_union);
610  AssertThrowMPI(ierr);
611  }
612  else
613  // process is active in the process grid
614  if (this->grid->mpi_process_is_active)
615  dest.values = this->values;
616 
617  dest.state = state;
618  dest.property = property;
619 }
620 
621 
622 
623 template <typename NumberType>
624 void
627 {
628  add(B, 0, 1, true);
629 }
630 
631 
632 
633 template <typename NumberType>
634 void
636  const NumberType alpha,
637  const NumberType beta,
638  const bool transpose_B)
639 {
640  if (transpose_B)
641  {
648  }
649  else
650  {
658  }
659  Assert(this->grid == B.grid,
660  ExcMessage("The matrices A and B need to have the same process grid"));
661 
662  if (this->grid->mpi_process_is_active)
663  {
664  char trans_b = transpose_B ? 'T' : 'N';
665  NumberType *A_loc =
666  (this->values.size() > 0) ? &this->values[0] : nullptr;
667  const NumberType *B_loc = (B.values.size() > 0) ? &B.values[0] : nullptr;
668 
669  pgeadd(&trans_b,
670  &n_rows,
671  &n_columns,
672  &beta,
673  B_loc,
674  &B.submatrix_row,
675  &B.submatrix_column,
676  B.descriptor,
677  &alpha,
678  A_loc,
679  &submatrix_row,
681  descriptor);
682  }
684 }
685 
686 
687 
688 template <typename NumberType>
689 void
692 {
693  add(B, 1, a, false);
694 }
695 
696 
697 
698 template <typename NumberType>
699 void
702 {
703  add(B, 1, a, true);
704 }
705 
706 
707 
708 template <typename NumberType>
709 void
712  const NumberType c,
714  const bool transpose_A,
715  const bool transpose_B) const
716 {
717  Assert(this->grid == B.grid,
718  ExcMessage("The matrices A and B need to have the same process grid"));
719  Assert(C.grid == B.grid,
720  ExcMessage("The matrices B and C need to have the same process grid"));
721 
722  // see for further info:
723  // https://www.ibm.com/support/knowledgecenter/SSNR5K_4.2.0/com.ibm.cluster.pessl.v4r2.pssl100.doc/am6gr_lgemm.htm
724  if (!transpose_A && !transpose_B)
725  {
726  Assert(this->n_columns == B.n_rows,
728  Assert(this->n_rows == C.n_rows,
730  Assert(B.n_columns == C.n_columns,
738  }
739  else if (transpose_A && !transpose_B)
740  {
741  Assert(this->n_rows == B.n_rows,
743  Assert(this->n_columns == C.n_rows,
745  Assert(B.n_columns == C.n_columns,
753  }
754  else if (!transpose_A && transpose_B)
755  {
756  Assert(this->n_columns == B.n_columns,
758  Assert(this->n_rows == C.n_rows,
760  Assert(B.n_rows == C.n_columns,
766  B.column_block_size));
769  }
770  else // if (transpose_A && transpose_B)
771  {
772  Assert(this->n_rows == B.n_columns,
774  Assert(this->n_columns == C.n_rows,
776  Assert(B.n_rows == C.n_columns,
784  }
785 
786  if (this->grid->mpi_process_is_active)
787  {
788  char trans_a = transpose_A ? 'T' : 'N';
789  char trans_b = transpose_B ? 'T' : 'N';
790 
791  const NumberType *A_loc =
792  (this->values.size() > 0) ? (&(this->values[0])) : nullptr;
793  const NumberType *B_loc =
794  (B.values.size() > 0) ? (&(B.values[0])) : nullptr;
795  NumberType *C_loc = (C.values.size() > 0) ? (&(C.values[0])) : nullptr;
796  int m = C.n_rows;
797  int n = C.n_columns;
798  int k = transpose_A ? this->n_rows : this->n_columns;
799 
800  pgemm(&trans_a,
801  &trans_b,
802  &m,
803  &n,
804  &k,
805  &b,
806  A_loc,
807  &(this->submatrix_row),
808  &(this->submatrix_column),
809  this->descriptor,
810  B_loc,
811  &B.submatrix_row,
812  &B.submatrix_column,
813  B.descriptor,
814  &c,
815  C_loc,
816  &C.submatrix_row,
817  &C.submatrix_column,
818  C.descriptor);
819  }
821 }
822 
823 
824 
825 template <typename NumberType>
826 void
829  const bool adding) const
830 {
831  if (adding)
832  mult(1., B, 1., C, false, false);
833  else
834  mult(1., B, 0, C, false, false);
835 }
836 
837 
838 
839 template <typename NumberType>
840 void
843  const bool adding) const
844 {
845  if (adding)
846  mult(1., B, 1., C, true, false);
847  else
848  mult(1., B, 0, C, true, false);
849 }
850 
851 
852 
853 template <typename NumberType>
854 void
857  const bool adding) const
858 {
859  if (adding)
860  mult(1., B, 1., C, false, true);
861  else
862  mult(1., B, 0, C, false, true);
863 }
864 
865 
866 
867 template <typename NumberType>
868 void
871  const bool adding) const
872 {
873  if (adding)
874  mult(1., B, 1., C, true, true);
875  else
876  mult(1., B, 0, C, true, true);
877 }
878 
879 
880 
881 template <typename NumberType>
882 void
884 {
885  Assert(
886  n_columns == n_rows && property == LAPACKSupport::Property::symmetric,
887  ExcMessage(
888  "Cholesky factorization can be applied to symmetric matrices only."));
890  ExcMessage(
891  "Matrix has to be in Matrix state before calling this function."));
892 
893  if (grid->mpi_process_is_active)
894  {
895  int info = 0;
896  NumberType *A_loc = &this->values[0];
897  // pdpotrf_(&uplo,&n_columns,A_loc,&submatrix_row,&submatrix_column,descriptor,&info);
898  ppotrf(&uplo,
899  &n_columns,
900  A_loc,
901  &submatrix_row,
903  descriptor,
904  &info);
905  AssertThrow(info == 0, LAPACKSupport::ExcErrorCode("ppotrf", info));
906  }
908  property = (uplo == 'L' ? LAPACKSupport::lower_triangular :
910 }
911 
912 
913 
914 template <typename NumberType>
915 void
917 {
919  ExcMessage(
920  "Matrix has to be in Matrix state before calling this function."));
921 
922  if (grid->mpi_process_is_active)
923  {
924  int info = 0;
925  NumberType *A_loc = &this->values[0];
926 
927  const int iarow = indxg2p_(&submatrix_row,
929  &(grid->this_process_row),
931  &(grid->n_process_rows));
932  const int mp = numroc_(&n_rows,
934  &(grid->this_process_row),
935  &iarow,
936  &(grid->n_process_rows));
937  ipiv.resize(mp + row_block_size);
938 
939  pgetrf(&n_rows,
940  &n_columns,
941  A_loc,
942  &submatrix_row,
944  descriptor,
945  ipiv.data(),
946  &info);
947  AssertThrow(info == 0, LAPACKSupport::ExcErrorCode("pgetrf", info));
948  }
949  state = LAPACKSupport::State::lu;
951 }
952 
953 
954 
955 template <typename NumberType>
956 void
958 {
959  // Check whether matrix is symmetric and save flag.
960  // If a Cholesky factorization has been applied previously,
961  // the original matrix was symmetric.
962  const bool is_symmetric = (property == LAPACKSupport::symmetric ||
963  state == LAPACKSupport::State::cholesky);
964 
965  // Check whether matrix is triangular and is in an unfactorized state.
966  const bool is_triangular = (property == LAPACKSupport::upper_triangular ||
967  property == LAPACKSupport::lower_triangular) &&
968  (state == LAPACKSupport::State::matrix ||
969  state == LAPACKSupport::State::inverse_matrix);
970 
971  if (is_triangular)
972  {
973  if (grid->mpi_process_is_active)
974  {
975  const char uploTriangular =
976  property == LAPACKSupport::upper_triangular ? 'U' : 'L';
977  const char diag = 'N';
978  int info = 0;
979  NumberType *A_loc = &this->values[0];
980  ptrtri(&uploTriangular,
981  &diag,
982  &n_columns,
983  A_loc,
984  &submatrix_row,
986  descriptor,
987  &info);
988  AssertThrow(info == 0, LAPACKSupport::ExcErrorCode("ptrtri", info));
989  // The inversion is stored in the same part as the triangular matrix,
990  // so we don't need to re-set the property here.
991  }
992  }
993  else
994  {
995  // Matrix is neither in Cholesky nor LU state.
996  // Compute the required factorizations based on the property of the
997  // matrix.
998  if (!(state == LAPACKSupport::State::lu ||
999  state == LAPACKSupport::State::cholesky))
1000  {
1001  if (is_symmetric)
1003  else
1005  }
1006  if (grid->mpi_process_is_active)
1007  {
1008  int info = 0;
1009  NumberType *A_loc = &this->values[0];
1010 
1011  if (is_symmetric)
1012  {
1013  ppotri(&uplo,
1014  &n_columns,
1015  A_loc,
1016  &submatrix_row,
1018  descriptor,
1019  &info);
1020  AssertThrow(info == 0,
1021  LAPACKSupport::ExcErrorCode("ppotri", info));
1022  property = LAPACKSupport::Property::symmetric;
1023  }
1024  else
1025  {
1026  int lwork = -1, liwork = -1;
1027  work.resize(1);
1028  iwork.resize(1);
1029 
1030  pgetri(&n_columns,
1031  A_loc,
1032  &submatrix_row,
1034  descriptor,
1035  ipiv.data(),
1036  work.data(),
1037  &lwork,
1038  iwork.data(),
1039  &liwork,
1040  &info);
1041 
1042  AssertThrow(info == 0,
1043  LAPACKSupport::ExcErrorCode("pgetri", info));
1044  lwork = work[0];
1045  liwork = iwork[0];
1046  work.resize(lwork);
1047  iwork.resize(liwork);
1048 
1049  pgetri(&n_columns,
1050  A_loc,
1051  &submatrix_row,
1053  descriptor,
1054  ipiv.data(),
1055  work.data(),
1056  &lwork,
1057  iwork.data(),
1058  &liwork,
1059  &info);
1060 
1061  AssertThrow(info == 0,
1062  LAPACKSupport::ExcErrorCode("pgetri", info));
1063  }
1064  }
1065  }
1066  state = LAPACKSupport::State::inverse_matrix;
1067 }
1068 
1069 
1070 
1071 template <typename NumberType>
1072 std::vector<NumberType>
1074  const std::pair<unsigned int, unsigned int> &index_limits,
1075  const bool compute_eigenvectors)
1076 {
1077  // check validity of index limits
1078  Assert(index_limits.first < (unsigned int)n_rows,
1079  ExcIndexRange(index_limits.first, 0, n_rows));
1080  Assert(index_limits.second < (unsigned int)n_rows,
1081  ExcIndexRange(index_limits.second, 0, n_rows));
1082 
1083  std::pair<unsigned int, unsigned int> idx =
1084  std::make_pair(std::min(index_limits.first, index_limits.second),
1085  std::max(index_limits.first, index_limits.second));
1086 
1087  // compute all eigenvalues/eigenvectors
1088  if (idx.first == 0 && idx.second == (unsigned int)n_rows - 1)
1089  return eigenpairs_symmetric(compute_eigenvectors);
1090  else
1091  return eigenpairs_symmetric(compute_eigenvectors, idx);
1092 }
1093 
1094 
1095 
1096 template <typename NumberType>
1097 std::vector<NumberType>
1099  const std::pair<NumberType, NumberType> &value_limits,
1100  const bool compute_eigenvectors)
1101 {
1102  Assert(!std::isnan(value_limits.first),
1103  ExcMessage("value_limits.first is NaN"));
1104  Assert(!std::isnan(value_limits.second),
1105  ExcMessage("value_limits.second is NaN"));
1106 
1107  std::pair<unsigned int, unsigned int> indices =
1108  std::make_pair(numbers::invalid_unsigned_int,
1110 
1111  return eigenpairs_symmetric(compute_eigenvectors, indices, value_limits);
1112 }
1113 
1114 
1115 
1116 template <typename NumberType>
1117 std::vector<NumberType>
1119  const bool compute_eigenvectors,
1120  const std::pair<unsigned int, unsigned int> &eigenvalue_idx,
1121  const std::pair<NumberType, NumberType> & eigenvalue_limits)
1122 {
1124  ExcMessage(
1125  "Matrix has to be in Matrix state before calling this function."));
1127  ExcMessage("Matrix has to be symmetric for this operation."));
1128 
1130 
1131  const bool use_values = (std::isnan(eigenvalue_limits.first) ||
1132  std::isnan(eigenvalue_limits.second)) ?
1133  false :
1134  true;
1135  const bool use_indices =
1136  ((eigenvalue_idx.first == numbers::invalid_unsigned_int) ||
1137  (eigenvalue_idx.second == numbers::invalid_unsigned_int)) ?
1138  false :
1139  true;
1140 
1141  Assert(
1142  !(use_values && use_indices),
1143  ExcMessage(
1144  "Prescribing both the index and value range for the eigenvalues is ambiguous"));
1145 
1146  // if computation of eigenvectors is not required use a sufficiently small
1147  // distributed matrix
1148  std::unique_ptr<ScaLAPACKMatrix<NumberType>> eigenvectors =
1149  compute_eigenvectors ?
1150  std_cxx14::make_unique<ScaLAPACKMatrix<NumberType>>(n_rows,
1151  grid,
1152  row_block_size) :
1153  std_cxx14::make_unique<ScaLAPACKMatrix<NumberType>>(
1154  grid->n_process_rows, grid->n_process_columns, grid, 1, 1);
1155 
1156  eigenvectors->property = property;
1157  // number of eigenvalues to be returned from psyevx; upon successful exit ev
1158  // contains the m seclected eigenvalues in ascending order set to all
1159  // eigenvaleus in case we will be using psyev.
1160  int m = n_rows;
1161  std::vector<NumberType> ev(n_rows);
1162 
1163  if (grid->mpi_process_is_active)
1164  {
1165  int info = 0;
1166  /*
1167  * for jobz==N only eigenvalues are computed, for jobz='V' also the
1168  * eigenvectors of the matrix are computed
1169  */
1170  char jobz = compute_eigenvectors ? 'V' : 'N';
1171  char range = 'A';
1172  // default value is to compute all eigenvalues and optionally eigenvectors
1173  bool all_eigenpairs = true;
1174  NumberType vl = NumberType(), vu = NumberType();
1175  int il = 1, iu = 1;
1176  // number of eigenvectors to be returned;
1177  // upon successful exit the first m=nz columns contain the selected
1178  // eigenvectors (only if jobz=='V')
1179  int nz = 0;
1180  NumberType abstol = NumberType();
1181 
1182  // orfac decides which eigenvectors should be reorthogonalized
1183  // see
1184  // http://www.netlib.org/scalapack/explore-html/df/d1a/pdsyevx_8f_source.html
1185  // for explanation to keeps simple no reorthogonalized will be done by
1186  // setting orfac to 0
1187  NumberType orfac = 0;
1188  // contains the indices of eigenvectors that failed to converge
1189  std::vector<int> ifail;
1190  // This array contains indices of eigenvectors corresponding to
1191  // a cluster of eigenvalues that could not be reorthogonalized
1192  // due to insufficient workspace
1193  // see
1194  // http://www.netlib.org/scalapack/explore-html/df/d1a/pdsyevx_8f_source.html
1195  // for explanation
1196  std::vector<int> iclustr;
1197  // This array contains the gap between eigenvalues whose
1198  // eigenvectors could not be reorthogonalized.
1199  // see
1200  // http://www.netlib.org/scalapack/explore-html/df/d1a/pdsyevx_8f_source.html
1201  // for explanation
1202  std::vector<NumberType> gap(n_local_rows * n_local_columns);
1203 
1204  // index range for eigenvalues is not specified
1205  if (!use_indices)
1206  {
1207  // interval for eigenvalues is not specified and consequently all
1208  // eigenvalues/eigenpairs will be computed
1209  if (!use_values)
1210  {
1211  range = 'A';
1212  all_eigenpairs = true;
1213  }
1214  else
1215  {
1216  range = 'V';
1217  all_eigenpairs = false;
1218  vl = std::min(eigenvalue_limits.first, eigenvalue_limits.second);
1219  vu = std::max(eigenvalue_limits.first, eigenvalue_limits.second);
1220  }
1221  }
1222  else
1223  {
1224  range = 'I';
1225  all_eigenpairs = false;
1226  // as Fortran starts counting/indexing from 1 unlike C/C++, where it
1227  // starts from 0
1228  il = std::min(eigenvalue_idx.first, eigenvalue_idx.second) + 1;
1229  iu = std::max(eigenvalue_idx.first, eigenvalue_idx.second) + 1;
1230  }
1231  NumberType *A_loc = &this->values[0];
1232  /*
1233  * by setting lwork to -1 a workspace query for optimal length of work is
1234  * performed
1235  */
1236  int lwork = -1;
1237  int liwork = -1;
1238  NumberType *eigenvectors_loc =
1239  (compute_eigenvectors ? &eigenvectors->values[0] : nullptr);
1240  work.resize(1);
1241  iwork.resize(1);
1242 
1243  if (all_eigenpairs)
1244  {
1245  psyev(&jobz,
1246  &uplo,
1247  &n_rows,
1248  A_loc,
1249  &submatrix_row,
1251  descriptor,
1252  &ev[0],
1253  eigenvectors_loc,
1254  &eigenvectors->submatrix_row,
1255  &eigenvectors->submatrix_column,
1256  eigenvectors->descriptor,
1257  &work[0],
1258  &lwork,
1259  &info);
1260  AssertThrow(info == 0, LAPACKSupport::ExcErrorCode("psyev", info));
1261  }
1262  else
1263  {
1264  char cmach = compute_eigenvectors ? 'U' : 'S';
1265  plamch(&(this->grid->blacs_context), &cmach, abstol);
1266  abstol *= 2;
1267  ifail.resize(n_rows);
1268  iclustr.resize(2 * grid->n_process_rows * grid->n_process_columns);
1269  gap.resize(grid->n_process_rows * grid->n_process_columns);
1270 
1271  psyevx(&jobz,
1272  &range,
1273  &uplo,
1274  &n_rows,
1275  A_loc,
1276  &submatrix_row,
1278  descriptor,
1279  &vl,
1280  &vu,
1281  &il,
1282  &iu,
1283  &abstol,
1284  &m,
1285  &nz,
1286  &ev[0],
1287  &orfac,
1288  eigenvectors_loc,
1289  &eigenvectors->submatrix_row,
1290  &eigenvectors->submatrix_column,
1291  eigenvectors->descriptor,
1292  &work[0],
1293  &lwork,
1294  &iwork[0],
1295  &liwork,
1296  &ifail[0],
1297  &iclustr[0],
1298  &gap[0],
1299  &info);
1300  AssertThrow(info == 0, LAPACKSupport::ExcErrorCode("psyevx", info));
1301  }
1302  lwork = work[0];
1303  work.resize(lwork);
1304 
1305  if (all_eigenpairs)
1306  {
1307  psyev(&jobz,
1308  &uplo,
1309  &n_rows,
1310  A_loc,
1311  &submatrix_row,
1313  descriptor,
1314  &ev[0],
1315  eigenvectors_loc,
1316  &eigenvectors->submatrix_row,
1317  &eigenvectors->submatrix_column,
1318  eigenvectors->descriptor,
1319  &work[0],
1320  &lwork,
1321  &info);
1322 
1323  AssertThrow(info == 0, LAPACKSupport::ExcErrorCode("psyev", info));
1324  }
1325  else
1326  {
1327  liwork = iwork[0];
1328  AssertThrow(liwork > 0, ExcInternalError());
1329  iwork.resize(liwork);
1330 
1331  psyevx(&jobz,
1332  &range,
1333  &uplo,
1334  &n_rows,
1335  A_loc,
1336  &submatrix_row,
1338  descriptor,
1339  &vl,
1340  &vu,
1341  &il,
1342  &iu,
1343  &abstol,
1344  &m,
1345  &nz,
1346  &ev[0],
1347  &orfac,
1348  eigenvectors_loc,
1349  &eigenvectors->submatrix_row,
1350  &eigenvectors->submatrix_column,
1351  eigenvectors->descriptor,
1352  &work[0],
1353  &lwork,
1354  &iwork[0],
1355  &liwork,
1356  &ifail[0],
1357  &iclustr[0],
1358  &gap[0],
1359  &info);
1360 
1361  AssertThrow(info == 0, LAPACKSupport::ExcErrorCode("psyevx", info));
1362  }
1363  // if eigenvectors are queried copy eigenvectors to original matrix
1364  // as the temporary matrix eigenvectors has identical dimensions and
1365  // block-cyclic distribution we simply swap the local array
1366  if (compute_eigenvectors)
1367  this->values.swap(eigenvectors->values);
1368 
1369  // adapt the size of ev to fit m upon return
1370  while ((int)ev.size() > m)
1371  ev.pop_back();
1372  }
1373  /*
1374  * send number of computed eigenvalues to inactive processes
1375  */
1376  grid->send_to_inactive(&m, 1);
1377 
1378  /*
1379  * inactive processes have to resize array of eigenvalues
1380  */
1381  if (!grid->mpi_process_is_active)
1382  ev.resize(m);
1383  /*
1384  * send the eigenvalues to processors not being part of the process grid
1385  */
1386  grid->send_to_inactive(ev.data(), ev.size());
1387 
1388  /*
1389  * if only eigenvalues are queried the content of the matrix will be destroyed
1390  * if the eigenpairs are queried matrix A on exit stores the eigenvectors in
1391  * the columns
1392  */
1393  if (compute_eigenvectors)
1394  {
1397  }
1398  else
1400 
1401  return ev;
1402 }
1403 
1404 
1405 
1406 template <typename NumberType>
1407 std::vector<NumberType>
1409  const std::pair<unsigned int, unsigned int> &index_limits,
1410  const bool compute_eigenvectors)
1411 {
1412  // Check validity of index limits.
1413  AssertIndexRange(index_limits.first, static_cast<unsigned int>(n_rows));
1414  AssertIndexRange(index_limits.second, static_cast<unsigned int>(n_rows));
1415 
1416  const std::pair<unsigned int, unsigned int> idx =
1417  std::make_pair(std::min(index_limits.first, index_limits.second),
1418  std::max(index_limits.first, index_limits.second));
1419 
1420  // Compute all eigenvalues/eigenvectors.
1421  if (idx.first == 0 && idx.second == static_cast<unsigned int>(n_rows - 1))
1422  return eigenpairs_symmetric_MRRR(compute_eigenvectors);
1423  else
1424  return eigenpairs_symmetric_MRRR(compute_eigenvectors, idx);
1425 }
1426 
1427 
1428 
1429 template <typename NumberType>
1430 std::vector<NumberType>
1432  const std::pair<NumberType, NumberType> &value_limits,
1433  const bool compute_eigenvectors)
1434 {
1435  AssertIsFinite(value_limits.first);
1436  AssertIsFinite(value_limits.second);
1437 
1438  const std::pair<unsigned int, unsigned int> indices =
1439  std::make_pair(numbers::invalid_unsigned_int,
1441 
1442  return eigenpairs_symmetric_MRRR(compute_eigenvectors, indices, value_limits);
1443 }
1444 
1445 
1446 
1447 template <typename NumberType>
1448 std::vector<NumberType>
1450  const bool compute_eigenvectors,
1451  const std::pair<unsigned int, unsigned int> &eigenvalue_idx,
1452  const std::pair<NumberType, NumberType> & eigenvalue_limits)
1453 {
1455  ExcMessage(
1456  "Matrix has to be in Matrix state before calling this function."));
1458  ExcMessage("Matrix has to be symmetric for this operation."));
1459 
1461 
1462  const bool use_values = (std::isnan(eigenvalue_limits.first) ||
1463  std::isnan(eigenvalue_limits.second)) ?
1464  false :
1465  true;
1466  const bool use_indices =
1467  ((eigenvalue_idx.first == numbers::invalid_unsigned_int) ||
1468  (eigenvalue_idx.second == numbers::invalid_unsigned_int)) ?
1469  false :
1470  true;
1471 
1472  Assert(
1473  !(use_values && use_indices),
1474  ExcMessage(
1475  "Prescribing both the index and value range for the eigenvalues is ambiguous"));
1476 
1477  // If computation of eigenvectors is not required, use a sufficiently small
1478  // distributed matrix.
1479  std::unique_ptr<ScaLAPACKMatrix<NumberType>> eigenvectors =
1480  compute_eigenvectors ?
1481  std_cxx14::make_unique<ScaLAPACKMatrix<NumberType>>(n_rows,
1482  grid,
1483  row_block_size) :
1484  std_cxx14::make_unique<ScaLAPACKMatrix<NumberType>>(
1485  grid->n_process_rows, grid->n_process_columns, grid, 1, 1);
1486 
1487  eigenvectors->property = property;
1488  // Number of eigenvalues to be returned from psyevr; upon successful exit ev
1489  // contains the m seclected eigenvalues in ascending order.
1490  int m = n_rows;
1491  std::vector<NumberType> ev(n_rows);
1492 
1493  // Number of eigenvectors to be returned;
1494  // Upon successful exit the first m=nz columns contain the selected
1495  // eigenvectors (only if jobz=='V').
1496  int nz = 0;
1497 
1498  if (grid->mpi_process_is_active)
1499  {
1500  int info = 0;
1501  /*
1502  * For jobz==N only eigenvalues are computed, for jobz='V' also the
1503  * eigenvectors of the matrix are computed.
1504  */
1505  char jobz = compute_eigenvectors ? 'V' : 'N';
1506  // Default value is to compute all eigenvalues and optionally
1507  // eigenvectors.
1508  char range = 'A';
1509  NumberType vl = NumberType(), vu = NumberType();
1510  int il = 1, iu = 1;
1511 
1512  // Index range for eigenvalues is not specified.
1513  if (!use_indices)
1514  {
1515  // Interval for eigenvalues is not specified and consequently all
1516  // eigenvalues/eigenpairs will be computed.
1517  if (!use_values)
1518  {
1519  range = 'A';
1520  }
1521  else
1522  {
1523  range = 'V';
1524  vl = std::min(eigenvalue_limits.first, eigenvalue_limits.second);
1525  vu = std::max(eigenvalue_limits.first, eigenvalue_limits.second);
1526  }
1527  }
1528  else
1529  {
1530  range = 'I';
1531  // As Fortran starts counting/indexing from 1 unlike C/C++, where it
1532  // starts from 0.
1533  il = std::min(eigenvalue_idx.first, eigenvalue_idx.second) + 1;
1534  iu = std::max(eigenvalue_idx.first, eigenvalue_idx.second) + 1;
1535  }
1536  NumberType *A_loc = &this->values[0];
1537 
1538  /*
1539  * By setting lwork to -1 a workspace query for optimal length of work is
1540  * performed.
1541  */
1542  int lwork = -1;
1543  int liwork = -1;
1544  NumberType *eigenvectors_loc =
1545  (compute_eigenvectors ? &eigenvectors->values[0] : nullptr);
1546  work.resize(1);
1547  iwork.resize(1);
1548 
1549  psyevr(&jobz,
1550  &range,
1551  &uplo,
1552  &n_rows,
1553  A_loc,
1554  &submatrix_row,
1556  descriptor,
1557  &vl,
1558  &vu,
1559  &il,
1560  &iu,
1561  &m,
1562  &nz,
1563  ev.data(),
1564  eigenvectors_loc,
1565  &eigenvectors->submatrix_row,
1566  &eigenvectors->submatrix_column,
1567  eigenvectors->descriptor,
1568  work.data(),
1569  &lwork,
1570  iwork.data(),
1571  &liwork,
1572  &info);
1573 
1574  AssertThrow(info == 0, LAPACKSupport::ExcErrorCode("psyevr", info));
1575 
1576  lwork = work[0];
1577  work.resize(lwork);
1578  liwork = iwork[0];
1579  iwork.resize(liwork);
1580 
1581  psyevr(&jobz,
1582  &range,
1583  &uplo,
1584  &n_rows,
1585  A_loc,
1586  &submatrix_row,
1588  descriptor,
1589  &vl,
1590  &vu,
1591  &il,
1592  &iu,
1593  &m,
1594  &nz,
1595  ev.data(),
1596  eigenvectors_loc,
1597  &eigenvectors->submatrix_row,
1598  &eigenvectors->submatrix_column,
1599  eigenvectors->descriptor,
1600  work.data(),
1601  &lwork,
1602  iwork.data(),
1603  &liwork,
1604  &info);
1605 
1606  AssertThrow(info == 0, LAPACKSupport::ExcErrorCode("psyevr", info));
1607 
1608  if (compute_eigenvectors)
1609  AssertThrow(
1610  m == nz,
1611  ExcMessage(
1612  "psyevr failed to compute all eigenvectors for the selected eigenvalues"));
1613 
1614  // If eigenvectors are queried, copy eigenvectors to original matrix.
1615  // As the temporary matrix eigenvectors has identical dimensions and
1616  // block-cyclic distribution we simply swap the local array.
1617  if (compute_eigenvectors)
1618  this->values.swap(eigenvectors->values);
1619 
1620  // Adapt the size of ev to fit m upon return.
1621  while ((int)ev.size() > m)
1622  ev.pop_back();
1623  }
1624  /*
1625  * Send number of computed eigenvalues to inactive processes.
1626  */
1627  grid->send_to_inactive(&m, 1);
1628 
1629  /*
1630  * Inactive processes have to resize array of eigenvalues.
1631  */
1632  if (!grid->mpi_process_is_active)
1633  ev.resize(m);
1634  /*
1635  * Send the eigenvalues to processors not being part of the process grid.
1636  */
1637  grid->send_to_inactive(ev.data(), ev.size());
1638 
1639  /*
1640  * If only eigenvalues are queried, the content of the matrix will be
1641  * destroyed. If the eigenpairs are queried, matrix A on exit stores the
1642  * eigenvectors in the columns.
1643  */
1644  if (compute_eigenvectors)
1645  {
1648  }
1649  else
1651 
1652  return ev;
1653 }
1654 
1655 
1656 
1657 template <typename NumberType>
1658 std::vector<NumberType>
1661 {
1663  ExcMessage(
1664  "Matrix has to be in Matrix state before calling this function."));
1667 
1668  const bool left_singluar_vectors = (U != nullptr) ? true : false;
1669  const bool right_singluar_vectors = (VT != nullptr) ? true : false;
1670 
1671  if (left_singluar_vectors)
1672  {
1674  Assert(U->n_rows == U->n_columns,
1680  Assert(grid->blacs_context == U->grid->blacs_context,
1681  ExcDimensionMismatch(grid->blacs_context, U->grid->blacs_context));
1682  }
1683  if (right_singluar_vectors)
1684  {
1685  Assert(n_columns == VT->n_rows,
1687  Assert(VT->n_rows == VT->n_columns,
1693  Assert(grid->blacs_context == VT->grid->blacs_context,
1694  ExcDimensionMismatch(grid->blacs_context,
1695  VT->grid->blacs_context));
1696  }
1698 
1699  std::vector<NumberType> sv(std::min(n_rows, n_columns));
1700 
1701  if (grid->mpi_process_is_active)
1702  {
1703  char jobu = left_singluar_vectors ? 'V' : 'N';
1704  char jobvt = right_singluar_vectors ? 'V' : 'N';
1705  NumberType *A_loc = &this->values[0];
1706  NumberType *U_loc = left_singluar_vectors ? &(U->values[0]) : nullptr;
1707  NumberType *VT_loc = right_singluar_vectors ? &(VT->values[0]) : nullptr;
1708  int info = 0;
1709  /*
1710  * by setting lwork to -1 a workspace query for optimal length of work is
1711  * performed
1712  */
1713  int lwork = -1;
1714  work.resize(1);
1715 
1716  pgesvd(&jobu,
1717  &jobvt,
1718  &n_rows,
1719  &n_columns,
1720  A_loc,
1721  &submatrix_row,
1723  descriptor,
1724  &*sv.begin(),
1725  U_loc,
1726  &U->submatrix_row,
1727  &U->submatrix_column,
1728  U->descriptor,
1729  VT_loc,
1730  &VT->submatrix_row,
1731  &VT->submatrix_column,
1732  VT->descriptor,
1733  &work[0],
1734  &lwork,
1735  &info);
1736  AssertThrow(info == 0, LAPACKSupport::ExcErrorCode("pgesvd", info));
1737 
1738  lwork = work[0];
1739  work.resize(lwork);
1740 
1741  pgesvd(&jobu,
1742  &jobvt,
1743  &n_rows,
1744  &n_columns,
1745  A_loc,
1746  &submatrix_row,
1748  descriptor,
1749  &*sv.begin(),
1750  U_loc,
1751  &U->submatrix_row,
1752  &U->submatrix_column,
1753  U->descriptor,
1754  VT_loc,
1755  &VT->submatrix_row,
1756  &VT->submatrix_column,
1757  VT->descriptor,
1758  &work[0],
1759  &lwork,
1760  &info);
1761  AssertThrow(info == 0, LAPACKSupport::ExcErrorCode("pgesvd", info));
1762  }
1763 
1764  /*
1765  * send the singular values to processors not being part of the process grid
1766  */
1767  grid->send_to_inactive(sv.data(), sv.size());
1768 
1770  state = LAPACKSupport::State::unusable;
1771 
1772  return sv;
1773 }
1774 
1775 
1776 
1777 template <typename NumberType>
1778 void
1780  const bool transpose)
1781 {
1782  Assert(grid == B.grid,
1783  ExcMessage("The matrices A and B need to have the same process grid"));
1785  ExcMessage(
1786  "Matrix has to be in Matrix state before calling this function."));
1788  ExcMessage(
1789  "Matrix B has to be in Matrix state before calling this function."));
1790 
1791  if (transpose)
1792  {
1794  }
1795  else
1796  {
1798  }
1799 
1800  // see
1801  // https://www.ibm.com/support/knowledgecenter/en/SSNR5K_4.2.0/com.ibm.cluster.pessl.v4r2.pssl100.doc/am6gr_lgels.htm
1803  ExcMessage(
1804  "Use identical block sizes for rows and columns of matrix A"));
1806  ExcMessage(
1807  "Use identical block sizes for rows and columns of matrix B"));
1809  ExcMessage(
1810  "Use identical block-cyclic distribution for matrices A and B"));
1811 
1813 
1814  if (grid->mpi_process_is_active)
1815  {
1816  char trans = transpose ? 'T' : 'N';
1817  NumberType *A_loc = &this->values[0];
1818  NumberType *B_loc = &B.values[0];
1819  int info = 0;
1820  /*
1821  * by setting lwork to -1 a workspace query for optimal length of work is
1822  * performed
1823  */
1824  int lwork = -1;
1825  work.resize(1);
1826 
1827  pgels(&trans,
1828  &n_rows,
1829  &n_columns,
1830  &B.n_columns,
1831  A_loc,
1832  &submatrix_row,
1834  descriptor,
1835  B_loc,
1836  &B.submatrix_row,
1837  &B.submatrix_column,
1838  B.descriptor,
1839  &work[0],
1840  &lwork,
1841  &info);
1842  AssertThrow(info == 0, LAPACKSupport::ExcErrorCode("pgels", info));
1843 
1844  lwork = work[0];
1845  work.resize(lwork);
1846 
1847  pgels(&trans,
1848  &n_rows,
1849  &n_columns,
1850  &B.n_columns,
1851  A_loc,
1852  &submatrix_row,
1854  descriptor,
1855  B_loc,
1856  &B.submatrix_row,
1857  &B.submatrix_column,
1858  B.descriptor,
1859  &work[0],
1860  &lwork,
1861  &info);
1862  AssertThrow(info == 0, LAPACKSupport::ExcErrorCode("pgels", info));
1863  }
1864  state = LAPACKSupport::State::unusable;
1865 }
1866 
1867 
1868 
1869 template <typename NumberType>
1870 unsigned int
1872 {
1874  ExcMessage(
1875  "Matrix has to be in Matrix state before calling this function."));
1877  ExcMessage(
1878  "Use identical block sizes for rows and columns of matrix A"));
1879  Assert(
1880  ratio > 0. && ratio < 1.,
1881  ExcMessage(
1882  "input parameter ratio has to be larger than zero and smaller than 1"));
1883 
1885  n_rows,
1886  grid,
1891  n_columns,
1892  grid,
1896  std::vector<NumberType> sv = this->compute_SVD(&U, &VT);
1897  AssertThrow(sv[0] > std::numeric_limits<NumberType>::min(),
1898  ExcMessage("Matrix has rank 0"));
1899 
1900  // Get number of singular values fulfilling the following: sv[i] > sv[0] *
1901  // ratio Obviously, 0-th element already satisfies sv[0] > sv[0] * ratio The
1902  // singular values in sv are ordered by descending value so we break out of
1903  // the loop if a singular value is smaller than sv[0] * ratio.
1904  unsigned int n_sv = 1;
1905  std::vector<NumberType> inv_sigma;
1906  inv_sigma.push_back(1 / sv[0]);
1907 
1908  for (unsigned int i = 1; i < sv.size(); ++i)
1909  if (sv[i] > sv[0] * ratio)
1910  {
1911  ++n_sv;
1912  inv_sigma.push_back(1 / sv[i]);
1913  }
1914  else
1915  break;
1916 
1917  // For the matrix multiplication we use only the columns of U and rows of VT
1918  // which are associated with singular values larger than the limit. That saves
1919  // computational time for matrices with rank significantly smaller than
1920  // min(n_rows,n_columns)
1922  n_sv,
1923  grid,
1927  ScaLAPACKMatrix<NumberType> VT_R(n_sv,
1928  n_columns,
1929  grid,
1933  U.copy_to(U_R,
1934  std::make_pair(0, 0),
1935  std::make_pair(0, 0),
1936  std::make_pair(n_rows, n_sv));
1937  VT.copy_to(VT_R,
1938  std::make_pair(0, 0),
1939  std::make_pair(0, 0),
1940  std::make_pair(n_sv, n_columns));
1941 
1942  VT_R.scale_rows(inv_sigma);
1943  this->reinit(n_columns,
1944  n_rows,
1945  this->grid,
1949  VT_R.mult(1, U_R, 0, *this, true, true);
1950  state = LAPACKSupport::State::inverse_matrix;
1951  return n_sv;
1952 }
1953 
1954 
1955 
1956 template <typename NumberType>
1957 NumberType
1959  const NumberType a_norm) const
1960 {
1962  ExcMessage(
1963  "Matrix has to be in Cholesky state before calling this function."));
1965  NumberType rcond = 0.;
1966 
1967  if (grid->mpi_process_is_active)
1968  {
1969  int liwork = n_local_rows;
1970  iwork.resize(liwork);
1971 
1972  int info = 0;
1973  const NumberType *A_loc = &this->values[0];
1974 
1975  // by setting lwork to -1 a workspace query for optimal length of work is
1976  // performed
1977  int lwork = -1;
1978  work.resize(1);
1979  ppocon(&uplo,
1980  &n_columns,
1981  A_loc,
1982  &submatrix_row,
1984  descriptor,
1985  &a_norm,
1986  &rcond,
1987  &work[0],
1988  &lwork,
1989  &iwork[0],
1990  &liwork,
1991  &info);
1992  AssertThrow(info == 0, LAPACKSupport::ExcErrorCode("pdpocon", info));
1993  lwork = std::ceil(work[0]);
1994  work.resize(lwork);
1995 
1996  // now the actual run:
1997  ppocon(&uplo,
1998  &n_columns,
1999  A_loc,
2000  &submatrix_row,
2002  descriptor,
2003  &a_norm,
2004  &rcond,
2005  &work[0],
2006  &lwork,
2007  &iwork[0],
2008  &liwork,
2009  &info);
2010  AssertThrow(info == 0, LAPACKSupport::ExcErrorCode("pdpocon", info));
2011  }
2012  grid->send_to_inactive(&rcond);
2013  return rcond;
2014 }
2015 
2016 
2017 
2018 template <typename NumberType>
2019 NumberType
2021 {
2022  const char type('O');
2023 
2025  return norm_symmetric(type);
2026  else
2027  return norm_general(type);
2028 }
2029 
2030 
2031 
2032 template <typename NumberType>
2033 NumberType
2035 {
2036  const char type('I');
2037 
2039  return norm_symmetric(type);
2040  else
2041  return norm_general(type);
2042 }
2043 
2044 
2045 
2046 template <typename NumberType>
2047 NumberType
2049 {
2050  const char type('F');
2051 
2053  return norm_symmetric(type);
2054  else
2055  return norm_general(type);
2056 }
2057 
2058 
2059 
2060 template <typename NumberType>
2061 NumberType
2063 {
2066  ExcMessage("norms can be called in matrix state only."));
2068  NumberType res = 0.;
2069 
2070  if (grid->mpi_process_is_active)
2071  {
2072  const int iarow = indxg2p_(&submatrix_row,
2073  &row_block_size,
2074  &(grid->this_process_row),
2076  &(grid->n_process_rows));
2077  const int iacol = indxg2p_(&submatrix_column,
2079  &(grid->this_process_column),
2081  &(grid->n_process_columns));
2082  const int mp0 = numroc_(&n_rows,
2083  &row_block_size,
2084  &(grid->this_process_row),
2085  &iarow,
2086  &(grid->n_process_rows));
2087  const int nq0 = numroc_(&n_columns,
2089  &(grid->this_process_column),
2090  &iacol,
2091  &(grid->n_process_columns));
2092 
2093  // type='M': compute largest absolute value
2094  // type='F' || type='E': compute Frobenius norm
2095  // type='0' || type='1': compute infinity norm
2096  int lwork = 0; // for type == 'M' || type == 'F' || type == 'E'
2097  if (type == 'O' || type == '1')
2098  lwork = nq0;
2099  else if (type == 'I')
2100  lwork = mp0;
2101 
2102  work.resize(lwork);
2103  const NumberType *A_loc = this->values.begin();
2104  res = plange(&type,
2105  &n_rows,
2106  &n_columns,
2107  A_loc,
2108  &submatrix_row,
2110  descriptor,
2111  work.data());
2112  }
2113  grid->send_to_inactive(&res);
2114  return res;
2115 }
2116 
2117 
2118 
2119 template <typename NumberType>
2120 NumberType
2122 {
2125  ExcMessage("norms can be called in matrix state only."));
2127  ExcMessage("Matrix has to be symmetric for this operation."));
2129  NumberType res = 0.;
2130 
2131  if (grid->mpi_process_is_active)
2132  {
2133  // int IROFFA = MOD( IA-1, MB_A )
2134  // int ICOFFA = MOD( JA-1, NB_A )
2135  const int lcm =
2136  ilcm_(&(grid->n_process_rows), &(grid->n_process_columns));
2137  const int v2 = lcm / (grid->n_process_rows);
2138 
2139  const int IAROW = indxg2p_(&submatrix_row,
2140  &row_block_size,
2141  &(grid->this_process_row),
2143  &(grid->n_process_rows));
2144  const int IACOL = indxg2p_(&submatrix_column,
2146  &(grid->this_process_column),
2148  &(grid->n_process_columns));
2149  const int Np0 = numroc_(&n_columns /*+IROFFA*/,
2150  &row_block_size,
2151  &(grid->this_process_row),
2152  &IAROW,
2153  &(grid->n_process_rows));
2154  const int Nq0 = numroc_(&n_columns /*+ICOFFA*/,
2156  &(grid->this_process_column),
2157  &IACOL,
2158  &(grid->n_process_columns));
2159 
2160  const int v1 = iceil_(&Np0, &row_block_size);
2161  const int ldw = (n_local_rows == n_local_columns) ?
2162  0 :
2163  row_block_size * iceil_(&v1, &v2);
2164 
2165  const int lwork =
2166  (type == 'M' || type == 'F' || type == 'E') ? 0 : 2 * Nq0 + Np0 + ldw;
2167  work.resize(lwork);
2168  const NumberType *A_loc = this->values.begin();
2169  res = plansy(&type,
2170  &uplo,
2171  &n_columns,
2172  A_loc,
2173  &submatrix_row,
2175  descriptor,
2176  work.data());
2177  }
2178  grid->send_to_inactive(&res);
2179  return res;
2180 }
2181 
2182 
2183 
2184 # ifdef DEAL_II_WITH_HDF5
2185 namespace internal
2186 {
2187  namespace
2188  {
2189  void
2190  create_HDF5_state_enum_id(hid_t &state_enum_id)
2191  {
2192  // create HDF5 enum type for LAPACKSupport::State
2194  state_enum_id = H5Tcreate(H5T_ENUM, sizeof(LAPACKSupport::State));
2195  val = LAPACKSupport::State::cholesky;
2196  herr_t status = H5Tenum_insert(state_enum_id, "cholesky", (int *)&val);
2197  AssertThrow(status >= 0, ExcInternalError());
2199  status = H5Tenum_insert(state_enum_id, "eigenvalues", (int *)&val);
2200  AssertThrow(status >= 0, ExcInternalError());
2201  val = LAPACKSupport::State::inverse_matrix;
2202  status = H5Tenum_insert(state_enum_id, "inverse_matrix", (int *)&val);
2203  AssertThrow(status >= 0, ExcInternalError());
2204  val = LAPACKSupport::State::inverse_svd;
2205  status = H5Tenum_insert(state_enum_id, "inverse_svd", (int *)&val);
2206  AssertThrow(status >= 0, ExcInternalError());
2207  val = LAPACKSupport::State::lu;
2208  status = H5Tenum_insert(state_enum_id, "lu", (int *)&val);
2209  AssertThrow(status >= 0, ExcInternalError());
2210  val = LAPACKSupport::State::matrix;
2211  status = H5Tenum_insert(state_enum_id, "matrix", (int *)&val);
2212  AssertThrow(status >= 0, ExcInternalError());
2213  val = LAPACKSupport::State::svd;
2214  status = H5Tenum_insert(state_enum_id, "svd", (int *)&val);
2215  AssertThrow(status >= 0, ExcInternalError());
2216  val = LAPACKSupport::State::unusable;
2217  status = H5Tenum_insert(state_enum_id, "unusable", (int *)&val);
2218  AssertThrow(status >= 0, ExcInternalError());
2219  }
2220 
2221  void
2222  create_HDF5_property_enum_id(hid_t &property_enum_id)
2223  {
2224  // create HDF5 enum type for LAPACKSupport::Property
2225  property_enum_id = H5Tcreate(H5T_ENUM, sizeof(LAPACKSupport::Property));
2226  LAPACKSupport::Property prop = LAPACKSupport::Property::diagonal;
2227  herr_t status =
2228  H5Tenum_insert(property_enum_id, "diagonal", (int *)&prop);
2229  AssertThrow(status >= 0, ExcInternalError());
2231  status = H5Tenum_insert(property_enum_id, "general", (int *)&prop);
2232  AssertThrow(status >= 0, ExcInternalError());
2233  prop = LAPACKSupport::Property::hessenberg;
2234  status = H5Tenum_insert(property_enum_id, "hessenberg", (int *)&prop);
2235  AssertThrow(status >= 0, ExcInternalError());
2236  prop = LAPACKSupport::Property::lower_triangular;
2237  status =
2238  H5Tenum_insert(property_enum_id, "lower_triangular", (int *)&prop);
2239  AssertThrow(status >= 0, ExcInternalError());
2240  prop = LAPACKSupport::Property::symmetric;
2241  status = H5Tenum_insert(property_enum_id, "symmetric", (int *)&prop);
2242  AssertThrow(status >= 0, ExcInternalError());
2243  prop = LAPACKSupport::Property::upper_triangular;
2244  status =
2245  H5Tenum_insert(property_enum_id, "upper_triangular", (int *)&prop);
2246  AssertThrow(status >= 0, ExcInternalError());
2247  }
2248  } // namespace
2249 } // namespace internal
2250 # endif
2251 
2252 
2253 
2254 template <typename NumberType>
2255 void
2257  const char * filename,
2258  const std::pair<unsigned int, unsigned int> &chunk_size) const
2259 {
2260 # ifndef DEAL_II_WITH_HDF5
2261  (void)filename;
2262  (void)chunk_size;
2263  AssertThrow(false, ExcMessage("HDF5 support is disabled."));
2264 # else
2265 
2266  std::pair<unsigned int, unsigned int> chunks_size_ = chunk_size;
2267 
2268  if (chunks_size_.first == numbers::invalid_unsigned_int ||
2269  chunks_size_.second == numbers::invalid_unsigned_int)
2270  {
2271  // default: store the matrix in chunks of columns
2272  chunks_size_.first = n_rows;
2273  chunks_size_.second = 1;
2274  }
2275  Assert((chunks_size_.first <= (unsigned int)n_rows) &&
2276  (chunks_size_.first > 0),
2277  ExcIndexRange(chunks_size_.first, 1, n_rows + 1));
2278  Assert((chunks_size_.second <= (unsigned int)n_columns) &&
2279  (chunks_size_.second > 0),
2280  ExcIndexRange(chunks_size_.second, 1, n_columns + 1));
2281 
2282 # ifdef H5_HAVE_PARALLEL
2283  // implementation for configurations equipped with a parallel file system
2284  save_parallel(filename, chunks_size_);
2285 
2286 # else
2287  // implementation for configurations with no parallel file system
2288  save_serial(filename, chunks_size_);
2289 
2290 # endif
2291 # endif
2292 }
2293 
2294 
2295 
2296 template <typename NumberType>
2297 void
2299  const char * filename,
2300  const std::pair<unsigned int, unsigned int> &chunk_size) const
2301 {
2302 # ifndef DEAL_II_WITH_HDF5
2303  (void)filename;
2304  (void)chunk_size;
2305  Assert(false, ExcInternalError());
2306 # else
2307 
2308  /*
2309  * The content of the distributed matrix is copied to a matrix using a 1x1
2310  * process grid. Therefore, one process has all the data and can write it to a
2311  * file.
2312  *
2313  * Create a 1x1 column grid which will be used to initialize
2314  * an effectively serial ScaLAPACK matrix to gather the contents from the
2315  * current object
2316  */
2317  const auto column_grid =
2318  std::make_shared<Utilities::MPI::ProcessGrid>(this->grid->mpi_communicator,
2319  1,
2320  1);
2321 
2322  const int MB = n_rows, NB = n_columns;
2323  ScaLAPACKMatrix<NumberType> tmp(n_rows, n_columns, column_grid, MB, NB);
2324  copy_to(tmp);
2325 
2326  // the 1x1 grid has only one process and this one writes
2327  // the content of the matrix to the HDF5 file
2328  if (tmp.grid->mpi_process_is_active)
2329  {
2330  herr_t status;
2331 
2332  // create a new file using default properties
2333  hid_t file_id =
2334  H5Fcreate(filename, H5F_ACC_TRUNC, H5P_DEFAULT, H5P_DEFAULT);
2335 
2336  // modify dataset creation properties, i.e. enable chunking
2337  hsize_t chunk_dims[2];
2338  // revert order of rows and columns as ScaLAPACK uses column-major
2339  // ordering
2340  chunk_dims[0] = chunk_size.second;
2341  chunk_dims[1] = chunk_size.first;
2342  hid_t data_property = H5Pcreate(H5P_DATASET_CREATE);
2343  status = H5Pset_chunk(data_property, 2, chunk_dims);
2344  AssertThrow(status >= 0, ExcIO());
2345 
2346  // create the data space for the dataset
2347  hsize_t dims[2];
2348  // change order of rows and columns as ScaLAPACKMatrix uses column major
2349  // ordering
2350  dims[0] = n_columns;
2351  dims[1] = n_rows;
2352  hid_t dataspace_id = H5Screate_simple(2, dims, nullptr);
2353 
2354  // create the dataset within the file using chunk creation properties
2355  hid_t type_id = hdf5_type_id(&tmp.values[0]);
2356  hid_t dataset_id = H5Dcreate2(file_id,
2357  "/matrix",
2358  type_id,
2359  dataspace_id,
2360  H5P_DEFAULT,
2361  data_property,
2362  H5P_DEFAULT);
2363 
2364  // write the dataset
2365  status = H5Dwrite(
2366  dataset_id, type_id, H5S_ALL, H5S_ALL, H5P_DEFAULT, &tmp.values[0]);
2367  AssertThrow(status >= 0, ExcIO());
2368 
2369  // create HDF5 enum type for LAPACKSupport::State and
2370  // LAPACKSupport::Property
2371  hid_t state_enum_id, property_enum_id;
2372  internal::create_HDF5_state_enum_id(state_enum_id);
2373  internal::create_HDF5_property_enum_id(property_enum_id);
2374 
2375  // create the data space for the state enum
2376  hsize_t dims_state[1];
2377  dims_state[0] = 1;
2378  hid_t state_enum_dataspace = H5Screate_simple(1, dims_state, nullptr);
2379  // create the dataset for the state enum
2380  hid_t state_enum_dataset = H5Dcreate2(file_id,
2381  "/state",
2382  state_enum_id,
2383  state_enum_dataspace,
2384  H5P_DEFAULT,
2385  H5P_DEFAULT,
2386  H5P_DEFAULT);
2387  // write the dataset for the state enum
2388  status = H5Dwrite(state_enum_dataset,
2389  state_enum_id,
2390  H5S_ALL,
2391  H5S_ALL,
2392  H5P_DEFAULT,
2393  &state);
2394  AssertThrow(status >= 0, ExcIO());
2395 
2396  // create the data space for the property enum
2397  hsize_t dims_property[1];
2398  dims_property[0] = 1;
2399  hid_t property_enum_dataspace =
2400  H5Screate_simple(1, dims_property, nullptr);
2401  // create the dataset for the property enum
2402  hid_t property_enum_dataset = H5Dcreate2(file_id,
2403  "/property",
2404  property_enum_id,
2405  property_enum_dataspace,
2406  H5P_DEFAULT,
2407  H5P_DEFAULT,
2408  H5P_DEFAULT);
2409  // write the dataset for the property enum
2410  status = H5Dwrite(property_enum_dataset,
2411  property_enum_id,
2412  H5S_ALL,
2413  H5S_ALL,
2414  H5P_DEFAULT,
2415  &property);
2416  AssertThrow(status >= 0, ExcIO());
2417 
2418  // end access to the datasets and release resources used by them
2419  status = H5Dclose(dataset_id);
2420  AssertThrow(status >= 0, ExcIO());
2421  status = H5Dclose(state_enum_dataset);
2422  AssertThrow(status >= 0, ExcIO());
2423  status = H5Dclose(property_enum_dataset);
2424  AssertThrow(status >= 0, ExcIO());
2425 
2426  // terminate access to the data spaces
2427  status = H5Sclose(dataspace_id);
2428  AssertThrow(status >= 0, ExcIO());
2429  status = H5Sclose(state_enum_dataspace);
2430  AssertThrow(status >= 0, ExcIO());
2431  status = H5Sclose(property_enum_dataspace);
2432  AssertThrow(status >= 0, ExcIO());
2433 
2434  // release enum data types
2435  status = H5Tclose(state_enum_id);
2436  AssertThrow(status >= 0, ExcIO());
2437  status = H5Tclose(property_enum_id);
2438  AssertThrow(status >= 0, ExcIO());
2439 
2440  // release the creation property
2441  status = H5Pclose(data_property);
2442  AssertThrow(status >= 0, ExcIO());
2443 
2444  // close the file.
2445  status = H5Fclose(file_id);
2446  AssertThrow(status >= 0, ExcIO());
2447  }
2448 # endif
2449 }
2450 
2451 
2452 
2453 template <typename NumberType>
2454 void
2456  const char * filename,
2457  const std::pair<unsigned int, unsigned int> &chunk_size) const
2458 {
2459 # ifndef DEAL_II_WITH_HDF5
2460  (void)filename;
2461  (void)chunk_size;
2462  Assert(false, ExcInternalError());
2463 # else
2464 
2465  const unsigned int n_mpi_processes(
2466  Utilities::MPI::n_mpi_processes(this->grid->mpi_communicator));
2467  MPI_Info info = MPI_INFO_NULL;
2468  /*
2469  * The content of the distributed matrix is copied to a matrix using a
2470  * 1xn_processes process grid. Therefore, the processes hold contiguous chunks
2471  * of the matrix, which they can write to the file
2472  *
2473  * Create a 1xn_processes column grid
2474  */
2475  const auto column_grid =
2476  std::make_shared<Utilities::MPI::ProcessGrid>(this->grid->mpi_communicator,
2477  1,
2478  n_mpi_processes);
2479 
2480  const int MB = n_rows, NB = std::ceil(n_columns / n_mpi_processes);
2481  ScaLAPACKMatrix<NumberType> tmp(n_rows, n_columns, column_grid, MB, NB);
2482  copy_to(tmp);
2483 
2484  // get pointer to data held by the process
2485  NumberType *data = (tmp.values.size() > 0) ? &tmp.values[0] : nullptr;
2486 
2487  herr_t status;
2488  // dataset dimensions
2489  hsize_t dims[2];
2490 
2491  // set up file access property list with parallel I/O access
2492  hid_t plist_id = H5Pcreate(H5P_FILE_ACCESS);
2493  status = H5Pset_fapl_mpio(plist_id, tmp.grid->mpi_communicator, info);
2494  AssertThrow(status >= 0, ExcIO());
2495 
2496  // create a new file collectively and release property list identifier
2497  hid_t file_id = H5Fcreate(filename, H5F_ACC_TRUNC, H5P_DEFAULT, plist_id);
2498  status = H5Pclose(plist_id);
2499  AssertThrow(status >= 0, ExcIO());
2500 
2501  // As ScaLAPACK, and therefore the class ScaLAPACKMatrix, uses column-major
2502  // ordering but HDF5 row-major ordering, we have to reverse entries related to
2503  // columns and rows in the following. create the dataspace for the dataset
2504  dims[0] = tmp.n_columns;
2505  dims[1] = tmp.n_rows;
2506 
2507  hid_t filespace = H5Screate_simple(2, dims, nullptr);
2508 
2509  // create the chunked dataset with default properties and close filespace
2510  hsize_t chunk_dims[2];
2511  // revert order of rows and columns as ScaLAPACK uses column-major ordering
2512  chunk_dims[0] = chunk_size.second;
2513  chunk_dims[1] = chunk_size.first;
2514  plist_id = H5Pcreate(H5P_DATASET_CREATE);
2515  H5Pset_chunk(plist_id, 2, chunk_dims);
2516  hid_t type_id = hdf5_type_id(data);
2517  hid_t dset_id = H5Dcreate2(
2518  file_id, "/matrix", type_id, filespace, H5P_DEFAULT, plist_id, H5P_DEFAULT);
2519 
2520  status = H5Sclose(filespace);
2521  AssertThrow(status >= 0, ExcIO());
2522 
2523  status = H5Pclose(plist_id);
2524  AssertThrow(status >= 0, ExcIO());
2525 
2526  // gather the number of local rows and columns from all processes
2527  std::vector<int> proc_n_local_rows(n_mpi_processes),
2528  proc_n_local_columns(n_mpi_processes);
2529  int ierr = MPI_Allgather(&tmp.n_local_rows,
2530  1,
2531  MPI_INT,
2532  proc_n_local_rows.data(),
2533  1,
2534  MPI_INT,
2535  tmp.grid->mpi_communicator);
2536  AssertThrowMPI(ierr);
2537  ierr = MPI_Allgather(&tmp.n_local_columns,
2538  1,
2539  MPI_INT,
2540  proc_n_local_columns.data(),
2541  1,
2542  MPI_INT,
2543  tmp.grid->mpi_communicator);
2544  AssertThrowMPI(ierr);
2545 
2546  const unsigned int my_rank(
2547  Utilities::MPI::this_mpi_process(tmp.grid->mpi_communicator));
2548 
2549  // hyperslab selection parameters
2550  // each process defines dataset in memory and writes it to the hyperslab in
2551  // the file
2552  hsize_t count[2];
2553  count[0] = tmp.n_local_columns;
2554  count[1] = tmp.n_rows;
2555  hid_t memspace = H5Screate_simple(2, count, nullptr);
2556 
2557  hsize_t offset[2] = {0};
2558  for (unsigned int i = 0; i < my_rank; ++i)
2559  offset[0] += proc_n_local_columns[i];
2560 
2561  // select hyperslab in the file.
2562  filespace = H5Dget_space(dset_id);
2563  status = H5Sselect_hyperslab(
2564  filespace, H5S_SELECT_SET, offset, nullptr, count, nullptr);
2565  AssertThrow(status >= 0, ExcIO());
2566 
2567  // create property list for independent dataset write
2568  plist_id = H5Pcreate(H5P_DATASET_XFER);
2569  status = H5Pset_dxpl_mpio(plist_id, H5FD_MPIO_INDEPENDENT);
2570  AssertThrow(status >= 0, ExcIO());
2571 
2572  // process with no data will not participate in writing to the file
2573  if (tmp.values.size() > 0)
2574  {
2575  status = H5Dwrite(dset_id, type_id, memspace, filespace, plist_id, data);
2576  AssertThrow(status >= 0, ExcIO());
2577  }
2578  // close/release sources
2579  status = H5Dclose(dset_id);
2580  AssertThrow(status >= 0, ExcIO());
2581  status = H5Sclose(filespace);
2582  AssertThrow(status >= 0, ExcIO());
2583  status = H5Sclose(memspace);
2584  AssertThrow(status >= 0, ExcIO());
2585  status = H5Pclose(plist_id);
2586  AssertThrow(status >= 0, ExcIO());
2587  status = H5Fclose(file_id);
2588  AssertThrow(status >= 0, ExcIO());
2589 
2590  // before writing the state and property to file wait for
2591  // all processes to finish writing the matrix content to the file
2592  ierr = MPI_Barrier(tmp.grid->mpi_communicator);
2593  AssertThrowMPI(ierr);
2594 
2595  // only root process will write state and property to the file
2596  if (tmp.grid->this_mpi_process == 0)
2597  {
2598  // open file using default properties
2599  hid_t file_id_reopen = H5Fopen(filename, H5F_ACC_RDWR, H5P_DEFAULT);
2600 
2601  // create HDF5 enum type for LAPACKSupport::State and
2602  // LAPACKSupport::Property
2603  hid_t state_enum_id, property_enum_id;
2604  internal::create_HDF5_state_enum_id(state_enum_id);
2605  internal::create_HDF5_property_enum_id(property_enum_id);
2606 
2607  // create the data space for the state enum
2608  hsize_t dims_state[1];
2609  dims_state[0] = 1;
2610  hid_t state_enum_dataspace = H5Screate_simple(1, dims_state, nullptr);
2611  // create the dataset for the state enum
2612  hid_t state_enum_dataset = H5Dcreate2(file_id_reopen,
2613  "/state",
2614  state_enum_id,
2615  state_enum_dataspace,
2616  H5P_DEFAULT,
2617  H5P_DEFAULT,
2618  H5P_DEFAULT);
2619  // write the dataset for the state enum
2620  status = H5Dwrite(state_enum_dataset,
2621  state_enum_id,
2622  H5S_ALL,
2623  H5S_ALL,
2624  H5P_DEFAULT,
2625  &state);
2626  AssertThrow(status >= 0, ExcIO());
2627 
2628  // create the data space for the property enum
2629  hsize_t dims_property[1];
2630  dims_property[0] = 1;
2631  hid_t property_enum_dataspace =
2632  H5Screate_simple(1, dims_property, nullptr);
2633  // create the dataset for the property enum
2634  hid_t property_enum_dataset = H5Dcreate2(file_id_reopen,
2635  "/property",
2636  property_enum_id,
2637  property_enum_dataspace,
2638  H5P_DEFAULT,
2639  H5P_DEFAULT,
2640  H5P_DEFAULT);
2641  // write the dataset for the property enum
2642  status = H5Dwrite(property_enum_dataset,
2643  property_enum_id,
2644  H5S_ALL,
2645  H5S_ALL,
2646  H5P_DEFAULT,
2647  &property);
2648  AssertThrow(status >= 0, ExcIO());
2649 
2650  status = H5Dclose(state_enum_dataset);
2651  AssertThrow(status >= 0, ExcIO());
2652  status = H5Dclose(property_enum_dataset);
2653  AssertThrow(status >= 0, ExcIO());
2654  status = H5Sclose(state_enum_dataspace);
2655  AssertThrow(status >= 0, ExcIO());
2656  status = H5Sclose(property_enum_dataspace);
2657  AssertThrow(status >= 0, ExcIO());
2658  status = H5Tclose(state_enum_id);
2659  AssertThrow(status >= 0, ExcIO());
2660  status = H5Tclose(property_enum_id);
2661  AssertThrow(status >= 0, ExcIO());
2662  status = H5Fclose(file_id_reopen);
2663  AssertThrow(status >= 0, ExcIO());
2664  }
2665 
2666 # endif
2667 }
2668 
2669 
2670 
2671 template <typename NumberType>
2672 void
2674 {
2675 # ifndef DEAL_II_WITH_HDF5
2676  (void)filename;
2677  AssertThrow(false, ExcMessage("HDF5 support is disabled."));
2678 # else
2679 # ifdef H5_HAVE_PARALLEL
2680  // implementation for configurations equipped with a parallel file system
2681  load_parallel(filename);
2682 
2683 # else
2684  // implementation for configurations with no parallel file system
2685  load_serial(filename);
2686 # endif
2687 # endif
2688 }
2689 
2690 
2691 
2692 template <typename NumberType>
2693 void
2694 ScaLAPACKMatrix<NumberType>::load_serial(const char *filename)
2695 {
2696 # ifndef DEAL_II_WITH_HDF5
2697  (void)filename;
2698  Assert(false, ExcInternalError());
2699 # else
2700 
2701  /*
2702  * The content of the distributed matrix is copied to a matrix using a 1x1
2703  * process grid. Therefore, one process has all the data and can write it to a
2704  * file
2705  */
2706  // create a 1xP column grid with P being the number of MPI processes
2707  const auto one_grid =
2708  std::make_shared<Utilities::MPI::ProcessGrid>(this->grid->mpi_communicator,
2709  1,
2710  1);
2711 
2712  const int MB = n_rows, NB = n_columns;
2713  ScaLAPACKMatrix<NumberType> tmp(n_rows, n_columns, one_grid, MB, NB);
2714 
2715  int state_int = -1;
2716  int property_int = -1;
2717 
2718  // the 1x1 grid has only one process and this one reads
2719  // the content of the matrix from the HDF5 file
2720  if (tmp.grid->mpi_process_is_active)
2721  {
2722  herr_t status;
2723 
2724  // open the file in read-only mode
2725  hid_t file_id = H5Fopen(filename, H5F_ACC_RDONLY, H5P_DEFAULT);
2726 
2727  // open the dataset in the file
2728  hid_t dataset_id = H5Dopen2(file_id, "/matrix", H5P_DEFAULT);
2729 
2730  // check the datatype of the data in the file
2731  // datatype of source and destination must have the same class
2732  // see HDF User's Guide: 6.10. Data Transfer: Datatype Conversion and
2733  // Selection
2734  hid_t datatype = H5Dget_type(dataset_id);
2735  H5T_class_t t_class_in = H5Tget_class(datatype);
2736  H5T_class_t t_class = H5Tget_class(hdf5_type_id(&tmp.values[0]));
2737  AssertThrow(
2738  t_class_in == t_class,
2739  ExcMessage(
2740  "The data type of the matrix to be read does not match the archive"));
2741 
2742  // get dataspace handle
2743  hid_t dataspace_id = H5Dget_space(dataset_id);
2744  // get number of dimensions
2745  const int ndims = H5Sget_simple_extent_ndims(dataspace_id);
2746  AssertThrow(ndims == 2, ExcIO());
2747  // get every dimension
2748  hsize_t dims[2];
2749  H5Sget_simple_extent_dims(dataspace_id, dims, nullptr);
2750  AssertThrow(
2751  (int)dims[0] == n_columns,
2752  ExcMessage(
2753  "The number of columns of the matrix does not match the content of the archive"));
2754  AssertThrow(
2755  (int)dims[1] == n_rows,
2756  ExcMessage(
2757  "The number of rows of the matrix does not match the content of the archive"));
2758 
2759  // read data
2760  status = H5Dread(dataset_id,
2761  hdf5_type_id(&tmp.values[0]),
2762  H5S_ALL,
2763  H5S_ALL,
2764  H5P_DEFAULT,
2765  &tmp.values[0]);
2766  AssertThrow(status >= 0, ExcIO());
2767 
2768  // create HDF5 enum type for LAPACKSupport::State and
2769  // LAPACKSupport::Property
2770  hid_t state_enum_id, property_enum_id;
2771  internal::create_HDF5_state_enum_id(state_enum_id);
2772  internal::create_HDF5_property_enum_id(property_enum_id);
2773 
2774  // open the datasets for the state and property enum in the file
2775  hid_t dataset_state_id = H5Dopen2(file_id, "/state", H5P_DEFAULT);
2776  hid_t datatype_state = H5Dget_type(dataset_state_id);
2777  H5T_class_t t_class_state = H5Tget_class(datatype_state);
2778  AssertThrow(t_class_state == H5T_ENUM, ExcIO());
2779 
2780  hid_t dataset_property_id = H5Dopen2(file_id, "/property", H5P_DEFAULT);
2781  hid_t datatype_property = H5Dget_type(dataset_property_id);
2782  H5T_class_t t_class_property = H5Tget_class(datatype_property);
2783  AssertThrow(t_class_property == H5T_ENUM, ExcIO());
2784 
2785  // get dataspace handles
2786  hid_t dataspace_state = H5Dget_space(dataset_state_id);
2787  hid_t dataspace_property = H5Dget_space(dataset_property_id);
2788  // get number of dimensions
2789  const int ndims_state = H5Sget_simple_extent_ndims(dataspace_state);
2790  AssertThrow(ndims_state == 1, ExcIO());
2791  const int ndims_property = H5Sget_simple_extent_ndims(dataspace_property);
2792  AssertThrow(ndims_property == 1, ExcIO());
2793  // get every dimension
2794  hsize_t dims_state[1];
2795  H5Sget_simple_extent_dims(dataspace_state, dims_state, nullptr);
2796  AssertThrow((int)dims_state[0] == 1, ExcIO());
2797  hsize_t dims_property[1];
2798  H5Sget_simple_extent_dims(dataspace_property, dims_property, nullptr);
2799  AssertThrow((int)dims_property[0] == 1, ExcIO());
2800 
2801  // read data
2802  status = H5Dread(dataset_state_id,
2803  state_enum_id,
2804  H5S_ALL,
2805  H5S_ALL,
2806  H5P_DEFAULT,
2807  &tmp.state);
2808  AssertThrow(status >= 0, ExcIO());
2809  // To send the state from the root process to the other processes
2810  // the state enum is casted to an integer, that will be broadcasted and
2811  // subsequently casted back to the enum type
2812  state_int = static_cast<int>(tmp.state);
2813 
2814  status = H5Dread(dataset_property_id,
2815  property_enum_id,
2816  H5S_ALL,
2817  H5S_ALL,
2818  H5P_DEFAULT,
2819  &tmp.property);
2820  AssertThrow(status >= 0, ExcIO());
2821  // To send the property from the root process to the other processes
2822  // the state enum is casted to an integer, that will be broadcasted and
2823  // subsequently casted back to the enum type
2824  property_int = static_cast<int>(tmp.property);
2825 
2826  // terminate access to the data spaces
2827  status = H5Sclose(dataspace_id);
2828  AssertThrow(status >= 0, ExcIO());
2829  status = H5Sclose(dataspace_state);
2830  AssertThrow(status >= 0, ExcIO());
2831  status = H5Sclose(dataspace_property);
2832  AssertThrow(status >= 0, ExcIO());
2833 
2834  // release data type handles
2835  status = H5Tclose(datatype);
2836  AssertThrow(status >= 0, ExcIO());
2837  status = H5Tclose(state_enum_id);
2838  AssertThrow(status >= 0, ExcIO());
2839  status = H5Tclose(property_enum_id);
2840  AssertThrow(status >= 0, ExcIO());
2841 
2842  // end access to the data sets and release resources used by them
2843  status = H5Dclose(dataset_state_id);
2844  AssertThrow(status >= 0, ExcIO());
2845  status = H5Dclose(dataset_id);
2846  AssertThrow(status >= 0, ExcIO());
2847  status = H5Dclose(dataset_property_id);
2848  AssertThrow(status >= 0, ExcIO());
2849 
2850  // close the file.
2851  status = H5Fclose(file_id);
2852  AssertThrow(status >= 0, ExcIO());
2853  }
2854  // so far only the root process has the correct state integer --> broadcasting
2855  tmp.grid->send_to_inactive(&state_int, 1);
2856  // so far only the root process has the correct property integer -->
2857  // broadcasting
2858  tmp.grid->send_to_inactive(&property_int, 1);
2859 
2860  tmp.state = static_cast<LAPACKSupport::State>(state_int);
2861  tmp.property = static_cast<LAPACKSupport::Property>(property_int);
2862 
2863  tmp.copy_to(*this);
2864 
2865 # endif // DEAL_II_WITH_HDF5
2866 }
2867 
2868 
2869 
2870 template <typename NumberType>
2871 void
2872 ScaLAPACKMatrix<NumberType>::load_parallel(const char *filename)
2873 {
2874 # ifndef DEAL_II_WITH_HDF5
2875  (void)filename;
2876  Assert(false, ExcInternalError());
2877 # else
2878 # ifndef H5_HAVE_PARALLEL
2879  Assert(false, ExcInternalError());
2880 # else
2881 
2882  const unsigned int n_mpi_processes(
2883  Utilities::MPI::n_mpi_processes(this->grid->mpi_communicator));
2884  MPI_Info info = MPI_INFO_NULL;
2885  /*
2886  * The content of the distributed matrix is copied to a matrix using a
2887  * 1xn_processes process grid. Therefore, the processes hold contiguous chunks
2888  * of the matrix, which they can write to the file
2889  */
2890  // create a 1xP column grid with P being the number of MPI processes
2891  const auto column_grid =
2892  std::make_shared<Utilities::MPI::ProcessGrid>(this->grid->mpi_communicator,
2893  1,
2894  n_mpi_processes);
2895 
2896  const int MB = n_rows, NB = std::ceil(n_columns / n_mpi_processes);
2897  ScaLAPACKMatrix<NumberType> tmp(n_rows, n_columns, column_grid, MB, NB);
2898 
2899  // get pointer to data held by the process
2900  NumberType *data = (tmp.values.size() > 0) ? &tmp.values[0] : nullptr;
2901 
2902  herr_t status;
2903 
2904  // set up file access property list with parallel I/O access
2905  hid_t plist_id = H5Pcreate(H5P_FILE_ACCESS);
2906  status = H5Pset_fapl_mpio(plist_id, tmp.grid->mpi_communicator, info);
2907  AssertThrow(status >= 0, ExcIO());
2908 
2909  // open file collectively in read-only mode and release property list
2910  // identifier
2911  hid_t file_id = H5Fopen(filename, H5F_ACC_RDONLY, plist_id);
2912  status = H5Pclose(plist_id);
2913  AssertThrow(status >= 0, ExcIO());
2914 
2915  // open the dataset in the file collectively
2916  hid_t dataset_id = H5Dopen2(file_id, "/matrix", H5P_DEFAULT);
2917 
2918  // check the datatype of the dataset in the file
2919  // if the classes of type of the dataset and the matrix do not match abort
2920  // see HDF User's Guide: 6.10. Data Transfer: Datatype Conversion and
2921  // Selection
2922  hid_t datatype = hdf5_type_id(data);
2923  hid_t datatype_inp = H5Dget_type(dataset_id);
2924  H5T_class_t t_class_inp = H5Tget_class(datatype_inp);
2925  H5T_class_t t_class = H5Tget_class(datatype);
2926  AssertThrow(
2927  t_class_inp == t_class,
2928  ExcMessage(
2929  "The data type of the matrix to be read does not match the archive"));
2930 
2931  // get the dimensions of the matrix stored in the file
2932  // get dataspace handle
2933  hid_t dataspace_id = H5Dget_space(dataset_id);
2934  // get number of dimensions
2935  const int ndims = H5Sget_simple_extent_ndims(dataspace_id);
2936  AssertThrow(ndims == 2, ExcIO());
2937  // get every dimension
2938  hsize_t dims[2];
2939  status = H5Sget_simple_extent_dims(dataspace_id, dims, nullptr);
2940  AssertThrow(status >= 0, ExcIO());
2941  AssertThrow(
2942  (int)dims[0] == n_columns,
2943  ExcMessage(
2944  "The number of columns of the matrix does not match the content of the archive"));
2945  AssertThrow(
2946  (int)dims[1] == n_rows,
2947  ExcMessage(
2948  "The number of rows of the matrix does not match the content of the archive"));
2949 
2950  // gather the number of local rows and columns from all processes
2951  std::vector<int> proc_n_local_rows(n_mpi_processes),
2952  proc_n_local_columns(n_mpi_processes);
2953  int ierr = MPI_Allgather(&tmp.n_local_rows,
2954  1,
2955  MPI_INT,
2956  proc_n_local_rows.data(),
2957  1,
2958  MPI_INT,
2959  tmp.grid->mpi_communicator);
2960  AssertThrowMPI(ierr);
2961  ierr = MPI_Allgather(&tmp.n_local_columns,
2962  1,
2963  MPI_INT,
2964  proc_n_local_columns.data(),
2965  1,
2966  MPI_INT,
2967  tmp.grid->mpi_communicator);
2968  AssertThrowMPI(ierr);
2969 
2970  const unsigned int my_rank(
2971  Utilities::MPI::this_mpi_process(tmp.grid->mpi_communicator));
2972 
2973  // hyperslab selection parameters
2974  // each process defines dataset in memory and writes it to the hyperslab in
2975  // the file
2976  hsize_t count[2];
2977  count[0] = tmp.n_local_columns;
2978  count[1] = tmp.n_local_rows;
2979 
2980  hsize_t offset[2] = {0};
2981  for (unsigned int i = 0; i < my_rank; ++i)
2982  offset[0] += proc_n_local_columns[i];
2983 
2984  // select hyperslab in the file
2985  status = H5Sselect_hyperslab(
2986  dataspace_id, H5S_SELECT_SET, offset, nullptr, count, nullptr);
2987  AssertThrow(status >= 0, ExcIO());
2988 
2989  // create a memory dataspace independently
2990  hid_t memspace = H5Screate_simple(2, count, nullptr);
2991 
2992  // read data independently
2993  status =
2994  H5Dread(dataset_id, datatype, memspace, dataspace_id, H5P_DEFAULT, data);
2995  AssertThrow(status >= 0, ExcIO());
2996 
2997  // create HDF5 enum type for LAPACKSupport::State and LAPACKSupport::Property
2998  hid_t state_enum_id, property_enum_id;
2999  internal::create_HDF5_state_enum_id(state_enum_id);
3000  internal::create_HDF5_property_enum_id(property_enum_id);
3001 
3002  // open the datasets for the state and property enum in the file
3003  hid_t dataset_state_id = H5Dopen2(file_id, "/state", H5P_DEFAULT);
3004  hid_t datatype_state = H5Dget_type(dataset_state_id);
3005  H5T_class_t t_class_state = H5Tget_class(datatype_state);
3006  AssertThrow(t_class_state == H5T_ENUM, ExcIO());
3007 
3008  hid_t dataset_property_id = H5Dopen2(file_id, "/property", H5P_DEFAULT);
3009  hid_t datatype_property = H5Dget_type(dataset_property_id);
3010  H5T_class_t t_class_property = H5Tget_class(datatype_property);
3011  AssertThrow(t_class_property == H5T_ENUM, ExcIO());
3012 
3013  // get dataspace handles
3014  hid_t dataspace_state = H5Dget_space(dataset_state_id);
3015  hid_t dataspace_property = H5Dget_space(dataset_property_id);
3016  // get number of dimensions
3017  const int ndims_state = H5Sget_simple_extent_ndims(dataspace_state);
3018  AssertThrow(ndims_state == 1, ExcIO());
3019  const int ndims_property = H5Sget_simple_extent_ndims(dataspace_property);
3020  AssertThrow(ndims_property == 1, ExcIO());
3021  // get every dimension
3022  hsize_t dims_state[1];
3023  H5Sget_simple_extent_dims(dataspace_state, dims_state, nullptr);
3024  AssertThrow((int)dims_state[0] == 1, ExcIO());
3025  hsize_t dims_property[1];
3026  H5Sget_simple_extent_dims(dataspace_property, dims_property, nullptr);
3027  AssertThrow((int)dims_property[0] == 1, ExcIO());
3028 
3029  // read data
3030  status = H5Dread(
3031  dataset_state_id, state_enum_id, H5S_ALL, H5S_ALL, H5P_DEFAULT, &tmp.state);
3032  AssertThrow(status >= 0, ExcIO());
3033 
3034  status = H5Dread(dataset_property_id,
3035  property_enum_id,
3036  H5S_ALL,
3037  H5S_ALL,
3038  H5P_DEFAULT,
3039  &tmp.property);
3040  AssertThrow(status >= 0, ExcIO());
3041 
3042  // close/release sources
3043  status = H5Sclose(memspace);
3044  AssertThrow(status >= 0, ExcIO());
3045  status = H5Dclose(dataset_id);
3046  AssertThrow(status >= 0, ExcIO());
3047  status = H5Dclose(dataset_state_id);
3048  AssertThrow(status >= 0, ExcIO());
3049  status = H5Dclose(dataset_property_id);
3050  AssertThrow(status >= 0, ExcIO());
3051  status = H5Sclose(dataspace_id);
3052  AssertThrow(status >= 0, ExcIO());
3053  status = H5Sclose(dataspace_state);
3054  AssertThrow(status >= 0, ExcIO());
3055  status = H5Sclose(dataspace_property);
3056  AssertThrow(status >= 0, ExcIO());
3057  // status = H5Tclose(datatype);
3058  // AssertThrow(status >= 0, ExcIO());
3059  status = H5Tclose(state_enum_id);
3060  AssertThrow(status >= 0, ExcIO());
3061  status = H5Tclose(property_enum_id);
3062  AssertThrow(status >= 0, ExcIO());
3063  status = H5Fclose(file_id);
3064  AssertThrow(status >= 0, ExcIO());
3065 
3066  // copying the distributed matrices
3067  tmp.copy_to(*this);
3068 
3069 # endif // H5_HAVE_PARALLEL
3070 # endif // DEAL_II_WITH_HDF5
3071 }
3072 
3073 
3074 
3075 namespace internal
3076 {
3077  namespace
3078  {
3079  template <typename NumberType>
3080  void
3082  const ArrayView<const NumberType> &factors)
3083  {
3084  Assert(matrix.n() == factors.size(),
3085  ExcDimensionMismatch(matrix.n(), factors.size()));
3086 
3087  for (unsigned int i = 0; i < matrix.local_n(); ++i)
3088  {
3089  const NumberType s = factors[matrix.global_column(i)];
3090 
3091  for (unsigned int j = 0; j < matrix.local_m(); ++j)
3092  matrix.local_el(j, i) *= s;
3093  }
3094  }
3095 
3096  template <typename NumberType>
3097  void
3099  const ArrayView<const NumberType> &factors)
3100  {
3101  Assert(matrix.m() == factors.size(),
3102  ExcDimensionMismatch(matrix.m(), factors.size()));
3103 
3104  for (unsigned int i = 0; i < matrix.local_m(); ++i)
3105  {
3106  const NumberType s = factors[matrix.global_row(i)];
3107 
3108  for (unsigned int j = 0; j < matrix.local_n(); ++j)
3109  matrix.local_el(i, j) *= s;
3110  }
3111  }
3112 
3113  } // namespace
3114 } // namespace internal
3115 
3116 
3117 
3118 template <typename NumberType>
3119 template <class InputVector>
3120 void
3122 {
3123  if (this->grid->mpi_process_is_active)
3124  internal::scale_columns(*this, make_array_view(factors));
3125 }
3126 
3127 
3128 
3129 template <typename NumberType>
3130 template <class InputVector>
3131 void
3132 ScaLAPACKMatrix<NumberType>::scale_rows(const InputVector &factors)
3133 {
3134  if (this->grid->mpi_process_is_active)
3135  internal::scale_rows(*this, make_array_view(factors));
3136 }
3137 
3138 
3139 
3140 // instantiations
3141 # include "scalapack.inst"
3142 
3143 
3144 DEAL_II_NAMESPACE_CLOSE
3145 
3146 #endif // DEAL_II_WITH_SCALAPACK
std::vector< NumberType > eigenpairs_symmetric_by_index_MRRR(const std::pair< unsigned int, unsigned int > &index_limits, const bool compute_eigenvectors)
Definition: scalapack.cc:1408
void copy_transposed(const ScaLAPACKMatrix< NumberType > &B)
Definition: scalapack.cc:625
ScaLAPACKMatrix(const size_type n_rows, const size_type n_columns, const std::shared_ptr< const Utilities::MPI::ProcessGrid > &process_grid, const size_type row_block_size=32, const size_type column_block_size=32, const LAPACKSupport::Property property=LAPACKSupport::Property::general)
Definition: scalapack.cc:79
Matrix is symmetric.
std::vector< NumberType > eigenpairs_symmetric_by_value_MRRR(const std::pair< NumberType, NumberType > &value_limits, const bool compute_eigenvectors)
Definition: scalapack.cc:1431
NumberType reciprocal_condition_number(const NumberType a_norm) const
Definition: scalapack.cc:1958
size_type n_rows() const
unsigned int size_type
Definition: scalapack.h:113
size_type m() const
static const unsigned int invalid_unsigned_int
Definition: types.h:173
std::size_t size() const
Definition: array_view.h:371
LAPACKSupport::State state
Definition: scalapack.h:859
unsigned int global_column(const unsigned int loc_column) const
Definition: scalapack.cc:285
void copy_to(FullMatrix< NumberType > &matrix) const
Definition: scalapack.cc:303
void save(const char *filename, const std::pair< unsigned int, unsigned int > &chunk_size=std::make_pair(numbers::invalid_unsigned_int, numbers::invalid_unsigned_int)) const
Definition: scalapack.cc:2256
Contents is actually a matrix.
std::array< std::pair< Number, Tensor< 1, dim, Number > >, std::integral_constant< int, dim >::value > eigenvectors(const SymmetricTensor< 2, dim, Number > &T, const SymmetricTensorEigenvectorMethod method=SymmetricTensorEigenvectorMethod::ql_implicit_shifts)
NumberType norm_general(const char type) const
Definition: scalapack.cc:2062
ScaLAPACKMatrix< NumberType > & operator=(const FullMatrix< NumberType > &)
Definition: scalapack.cc:237
static::ExceptionBase & ExcIO()
std::array< Number, 1 > eigenvalues(const SymmetricTensor< 2, 1, Number > &T)
const char uplo
Definition: scalapack.h:929
unsigned int pseudoinverse(const NumberType ratio)
Definition: scalapack.cc:1871
void load(const char *filename)
Definition: scalapack.cc:2673
Matrix is upper triangular.
unsigned int local_m() const
void mTmult(ScaLAPACKMatrix< NumberType > &C, const ScaLAPACKMatrix< NumberType > &B, const bool adding=false) const
Definition: scalapack.cc:855
size_type n() const
std::vector< NumberType > eigenpairs_symmetric(const bool compute_eigenvectors, const std::pair< unsigned int, unsigned int > &index_limits=std::make_pair(numbers::invalid_unsigned_int, numbers::invalid_unsigned_int), const std::pair< NumberType, NumberType > &value_limits=std::make_pair(std::numeric_limits< NumberType >::quiet_NaN(), std::numeric_limits< NumberType >::quiet_NaN()))
Definition: scalapack.cc:1118
Contents is the inverse of a matrix.
std::vector< NumberType > eigenpairs_symmetric_by_index(const std::pair< unsigned int, unsigned int > &index_limits, const bool compute_eigenvectors)
Definition: scalapack.cc:1073
#define AssertIndexRange(index, range)
Definition: exceptions.h:1407
void mult(const NumberType b, const ScaLAPACKMatrix< NumberType > &B, const NumberType c, ScaLAPACKMatrix< NumberType > &C, const bool transpose_A=false, const bool transpose_B=false) const
Definition: scalapack.cc:710
void scale_columns(const InputVector &factors)
Definition: scalapack.cc:3121
std::shared_ptr< const Utilities::MPI::ProcessGrid > grid
Definition: scalapack.h:872
#define AssertThrow(cond, exc)
Definition: exceptions.h:1329
static::ExceptionBase & ExcErrorCode(std::string arg1, types::blas_int arg2)
std::vector< int > iwork
Definition: scalapack.h:917
static::ExceptionBase & ExcIndexRange(int arg1, int arg2, int arg3)
const int submatrix_column
Definition: scalapack.h:953
void reinit(const size_type n_rows, const size_type n_columns, const std::shared_ptr< const Utilities::MPI::ProcessGrid > &process_grid, const size_type row_block_size=32, const size_type column_block_size=32, const LAPACKSupport::Property property=LAPACKSupport::Property::general)
Definition: scalapack.cc:121
const TableIndices< N > & size() const
LAPACKSupport::Property get_property() const
Definition: scalapack.cc:219
size_type size() const
void least_squares(ScaLAPACKMatrix< NumberType > &B, const bool transpose=false)
Definition: scalapack.cc:1779
NumberType norm_symmetric(const char type) const
Definition: scalapack.cc:2121
static::ExceptionBase & ExcMessage(std::string arg1)
size_type n() const
Contents is a Cholesky decomposition.
void scale_rows(const InputVector &factors)
Definition: scalapack.cc:3132
T sum(const T &t, const MPI_Comm &mpi_communicator)
void set_property(const LAPACKSupport::Property property)
Definition: scalapack.cc:209
#define Assert(cond, exc)
Definition: exceptions.h:1227
std::vector< int > ipiv
Definition: scalapack.h:923
static::ExceptionBase & ExcDimensionMismatch(std::size_t arg1, std::size_t arg2)
const int first_process_row
Definition: scalapack.h:935
void reinit(const size_type size1, const size_type size2, const bool omit_default_initialization=false)
int create_group(const MPI_Comm &comm, const MPI_Group &group, const int tag, MPI_Comm *new_comm)
Definition: mpi.cc:102
void add(const ScaLAPACKMatrix< NumberType > &B, const NumberType a=0., const NumberType b=1., const bool transpose_B=false)
Definition: scalapack.cc:635
Contents is something useless.
std::vector< NumberType > eigenpairs_symmetric_by_value(const std::pair< NumberType, NumberType > &value_limits, const bool compute_eigenvectors)
Definition: scalapack.cc:1098
int column_block_size
Definition: scalapack.h:892
Threads::Mutex mutex
Definition: scalapack.h:958
size_type m() const
SymmetricTensor< rank_, dim, Number > transpose(const SymmetricTensor< rank_, dim, Number > &t)
const int submatrix_row
Definition: scalapack.h:947
NumberType l1_norm() const
Definition: scalapack.cc:2020
void TmTmult(ScaLAPACKMatrix< NumberType > &C, const ScaLAPACKMatrix< NumberType > &B, const bool adding=false) const
Definition: scalapack.cc:869
unsigned int n_mpi_processes(const MPI_Comm &mpi_communicator)
Definition: mpi.cc:69
void mmult(ScaLAPACKMatrix< NumberType > &C, const ScaLAPACKMatrix< NumberType > &B, const bool adding=false) const
Definition: scalapack.cc:827
NumberType local_el(const unsigned int loc_row, const unsigned int loc_column) const
void swap(AlignedVector< T > &vec)
#define AssertThrowMPI(error_code)
Definition: exceptions.h:1443
NumberType frobenius_norm() const
Definition: scalapack.cc:2048
std::vector< NumberType > eigenpairs_symmetric_MRRR(const bool compute_eigenvectors, const std::pair< unsigned int, unsigned int > &index_limits=std::make_pair(numbers::invalid_unsigned_int, numbers::invalid_unsigned_int), const std::pair< NumberType, NumberType > &value_limits=std::make_pair(std::numeric_limits< NumberType >::quiet_NaN(), std::numeric_limits< NumberType >::quiet_NaN()))
Definition: scalapack.cc:1449
std::vector< NumberType > compute_SVD(ScaLAPACKMatrix< NumberType > *U=nullptr, ScaLAPACKMatrix< NumberType > *VT=nullptr)
Definition: scalapack.cc:1659
iterator begin()
unsigned int local_n() const
std::vector< NumberType > work
Definition: scalapack.h:912
unsigned int this_mpi_process(const MPI_Comm &mpi_communicator)
Definition: mpi.cc:80
static::ExceptionBase & ExcNotImplemented()
const int first_process_column
Definition: scalapack.h:941
LAPACKSupport::Property property
Definition: scalapack.h:865
void Tadd(const NumberType b, const ScaLAPACKMatrix< NumberType > &B)
Definition: scalapack.cc:700
int descriptor[9]
Definition: scalapack.h:907
unsigned int global_row(const unsigned int loc_row) const
Definition: scalapack.cc:268
Eigenvalue vector is filled.
AlignedVector< NumberType > values
Definition: table.h:681
void Tmmult(ScaLAPACKMatrix< NumberType > &C, const ScaLAPACKMatrix< NumberType > &B, const bool adding=false) const
Definition: scalapack.cc:841
void compute_cholesky_factorization()
Definition: scalapack.cc:883
NumberType linfty_norm() const
Definition: scalapack.cc:2034
Matrix is lower triangular.
#define AssertIsFinite(number)
Definition: exceptions.h:1428
static::ExceptionBase & ExcInternalError()
LAPACKSupport::State get_state() const
Definition: scalapack.cc:228
void compute_lu_factorization()
Definition: scalapack.cc:916