Reference documentation for deal.II version 9.1.0-pre
task_info.cc
1 // ---------------------------------------------------------------------
2 //
3 // Copyright (C) 2018 by the deal.II authors
4 //
5 // This file is part of the deal.II library.
6 //
7 // The deal.II library is free software; you can use it, redistribute
8 // it, and/or modify it under the terms of the GNU Lesser General
9 // Public License as published by the Free Software Foundation; either
10 // version 2.1 of the License, or (at your option) any later version.
11 // The full text of the license can be found in the file LICENSE.md at
12 // the top level directory of deal.II.
13 //
14 // ---------------------------------------------------------------------
15 
16 
17 #include <deal.II/base/conditional_ostream.h>
18 #include <deal.II/base/memory_consumption.h>
19 #include <deal.II/base/mpi.h>
20 #include <deal.II/base/multithread_info.h>
21 #include <deal.II/base/parallel.h>
22 #include <deal.II/base/utilities.h>
23 
24 #include <deal.II/matrix_free/task_info.h>
25 
26 
27 #ifdef DEAL_II_WITH_THREADS
28 # include <tbb/blocked_range.h>
29 # include <tbb/parallel_for.h>
30 # include <tbb/task.h>
31 # include <tbb/task_scheduler_init.h>
32 #endif
33 
34 #include <iostream>
35 #include <set>
36 
37 DEAL_II_NAMESPACE_OPEN
38 
39 
40 
41 /*-------------------- Implementation of the matrix-free loop --------------*/
42 namespace internal
43 {
44  namespace MatrixFreeFunctions
45  {
46 #ifdef DEAL_II_WITH_THREADS
47 
48  // This defines the TBB data structures that are needed to schedule the
49  // partition-partition variant
50 
51  namespace partition
52  {
53  class ActualCellWork
54  {
55  public:
56  ActualCellWork(MFWorkerInterface **worker_pointer,
57  const unsigned int partition,
58  const TaskInfo & task_info)
59  : worker(nullptr)
60  , worker_pointer(worker_pointer)
61  , partition(partition)
62  , task_info(task_info)
63  {}
64 
65  ActualCellWork(MFWorkerInterface &worker,
66  const unsigned int partition,
67  const TaskInfo & task_info)
68  : worker(&worker)
69  , worker_pointer(nullptr)
70  , partition(partition)
71  , task_info(task_info)
72  {}
73 
74  void
75  operator()() const
76  {
77  MFWorkerInterface *used_worker =
78  worker != nullptr ? worker : *worker_pointer;
79  Assert(used_worker != nullptr, ExcInternalError());
80  used_worker->cell(
81  std::make_pair(task_info.cell_partition_data[partition],
82  task_info.cell_partition_data[partition + 1]));
83 
84  if (task_info.face_partition_data.empty() == false)
85  {
86  used_worker->face(
87  std::make_pair(task_info.face_partition_data[partition],
88  task_info.face_partition_data[partition + 1]));
89 
90  used_worker->boundary(std::make_pair(
91  task_info.boundary_partition_data[partition],
92  task_info.boundary_partition_data[partition + 1]));
93  }
94  }
95 
96  private:
97  MFWorkerInterface * worker;
98  MFWorkerInterface **worker_pointer;
99  const unsigned int partition;
100  const TaskInfo & task_info;
101  };
102 
103  class CellWork : public tbb::task
104  {
105  public:
106  CellWork(MFWorkerInterface &worker,
107  const unsigned int partition,
108  const TaskInfo & task_info,
109  const bool is_blocked)
110  : dummy(nullptr)
111  , work(worker, partition, task_info)
112  , is_blocked(is_blocked)
113  {}
114 
115  tbb::task *
116  execute() override
117  {
118  work();
119 
120  if (is_blocked == true)
121  dummy->spawn(*dummy);
122  return nullptr;
123  }
124 
125  tbb::empty_task *dummy;
126 
127  private:
128  ActualCellWork work;
129  const bool is_blocked;
130  };
131 
132 
133 
134  class PartitionWork : public tbb::task
135  {
136  public:
137  PartitionWork(MFWorkerInterface &function_in,
138  const unsigned int partition_in,
139  const TaskInfo & task_info_in,
140  const bool is_blocked_in = false)
141  : dummy(nullptr)
142  , function(function_in)
143  , partition(partition_in)
144  , task_info(task_info_in)
145  , is_blocked(is_blocked_in)
146  {}
147 
148  tbb::task *
149  execute() override
150  {
151  tbb::empty_task *root =
152  new (tbb::task::allocate_root()) tbb::empty_task;
153  const unsigned int evens = task_info.partition_evens[partition];
154  const unsigned int odds = task_info.partition_odds[partition];
155  const unsigned int n_blocked_workers =
156  task_info.partition_n_blocked_workers[partition];
157  const unsigned int n_workers =
158  task_info.partition_n_workers[partition];
159  std::vector<CellWork *> worker(n_workers);
160  std::vector<CellWork *> blocked_worker(n_blocked_workers);
161 
162  root->set_ref_count(evens + 1);
163  for (unsigned int j = 0; j < evens; j++)
164  {
165  worker[j] = new (root->allocate_child())
166  CellWork(function,
167  task_info.partition_row_index[partition] + 2 * j,
168  task_info,
169  false);
170  if (j > 0)
171  {
172  worker[j]->set_ref_count(2);
173  blocked_worker[j - 1]->dummy =
174  new (worker[j]->allocate_child()) tbb::empty_task;
175  worker[j - 1]->spawn(*blocked_worker[j - 1]);
176  }
177  else
178  worker[j]->set_ref_count(1);
179  if (j < evens - 1)
180  {
181  blocked_worker[j] = new (worker[j]->allocate_child())
182  CellWork(function,
183  task_info.partition_row_index[partition] + 2 * j +
184  1,
185  task_info,
186  true);
187  }
188  else
189  {
190  if (odds == evens)
191  {
192  worker[evens] = new (worker[j]->allocate_child())
193  CellWork(function,
194  task_info.partition_row_index[partition] +
195  2 * j + 1,
196  task_info,
197  false);
198  worker[j]->spawn(*worker[evens]);
199  }
200  else
201  {
202  tbb::empty_task *child =
203  new (worker[j]->allocate_child()) tbb::empty_task();
204  worker[j]->spawn(*child);
205  }
206  }
207  }
208 
209  root->wait_for_all();
210  root->destroy(*root);
211  if (is_blocked == true)
212  dummy->spawn(*dummy);
213  return nullptr;
214  }
215 
216  tbb::empty_task *dummy;
217 
218  private:
219  MFWorkerInterface &function;
220  const unsigned int partition;
221  const TaskInfo & task_info;
222  const bool is_blocked;
223  };
224 
225  } // end of namespace partition
226 
227 
228 
229  namespace color
230  {
231  class CellWork
232  {
233  public:
234  CellWork(MFWorkerInterface &worker_in,
235  const TaskInfo & task_info_in,
236  const unsigned int partition_in)
237  : worker(worker_in)
238  , task_info(task_info_in)
239  , partition(partition_in)
240  {}
241 
242  void
243  operator()(const tbb::blocked_range<unsigned int> &r) const
244  {
245  const unsigned int start_index =
246  task_info.cell_partition_data[partition] +
247  task_info.block_size * r.begin();
248  const unsigned int end_index =
249  std::min(start_index + task_info.block_size * (r.end() - r.begin()),
250  task_info.cell_partition_data[partition + 1]);
251  worker.cell(std::make_pair(start_index, end_index));
252 
253  if (task_info.face_partition_data.empty() == false)
254  {
255  AssertThrow(false, ExcNotImplemented());
256  }
257  }
258 
259  private:
260  MFWorkerInterface &worker;
261  const TaskInfo & task_info;
262  const unsigned int partition;
263  };
264 
265 
266 
267  class PartitionWork : public tbb::task
268  {
269  public:
270  PartitionWork(MFWorkerInterface &worker_in,
271  const unsigned int partition_in,
272  const TaskInfo & task_info_in,
273  const bool is_blocked_in)
274  : dummy(nullptr)
275  , worker(worker_in)
276  , partition(partition_in)
277  , task_info(task_info_in)
278  , is_blocked(is_blocked_in)
279  {}
280 
281  tbb::task *
282  execute() override
283  {
284  const unsigned int n_chunks =
285  (task_info.cell_partition_data[partition + 1] -
286  task_info.cell_partition_data[partition] + task_info.block_size -
287  1) /
288  task_info.block_size;
289  parallel_for(tbb::blocked_range<unsigned int>(0, n_chunks, 1),
290  CellWork(worker, task_info, partition));
291  if (is_blocked == true)
292  dummy->spawn(*dummy);
293  return nullptr;
294  }
295 
296  tbb::empty_task *dummy;
297 
298  private:
299  MFWorkerInterface &worker;
300  const unsigned int partition;
301  const TaskInfo & task_info;
302  const bool is_blocked;
303  };
304 
305  } // end of namespace color
306 
307 
308 
309  class MPICommunication : public tbb::task
310  {
311  public:
312  MPICommunication(MFWorkerInterface &worker_in, const bool do_compress)
313  : worker(worker_in)
314  , do_compress(do_compress)
315  {}
316 
317  tbb::task *
318  execute() override
319  {
320  if (do_compress == false)
321  worker.vector_update_ghosts_finish();
322  else
323  worker.vector_compress_start();
324  return nullptr;
325  }
326 
327  private:
328  MFWorkerInterface &worker;
329  const bool do_compress;
330  };
331 
332 #endif // DEAL_II_WITH_THREADS
333 
334 
335 
336  void
338  {
340 
341 #ifdef DEAL_II_WITH_THREADS
342 
343  if (scheme != none)
344  {
346  if (scheme == partition_partition)
347  {
348  tbb::empty_task *root =
349  new (tbb::task::allocate_root()) tbb::empty_task;
350  root->set_ref_count(evens + 1);
351  std::vector<partition::PartitionWork *> worker(n_workers);
352  std::vector<partition::PartitionWork *> blocked_worker(
353  n_blocked_workers);
354  MPICommunication *worker_compr =
355  new (root->allocate_child()) MPICommunication(funct, true);
356  worker_compr->set_ref_count(1);
357  for (unsigned int j = 0; j < evens; j++)
358  {
359  if (j > 0)
360  {
361  worker[j] = new (root->allocate_child())
362  partition::PartitionWork(funct, 2 * j, *this, false);
363  worker[j]->set_ref_count(2);
364  blocked_worker[j - 1]->dummy =
365  new (worker[j]->allocate_child()) tbb::empty_task;
366  if (j > 1)
367  worker[j - 1]->spawn(*blocked_worker[j - 1]);
368  else
369  worker_compr->spawn(*blocked_worker[j - 1]);
370  }
371  else
372  {
373  worker[j] = new (worker_compr->allocate_child())
374  partition::PartitionWork(funct, 2 * j, *this, false);
375  worker[j]->set_ref_count(2);
376  MPICommunication *worker_dist =
377  new (worker[j]->allocate_child())
378  MPICommunication(funct, false);
379  worker_dist->spawn(*worker_dist);
380  }
381  if (j < evens - 1)
382  {
383  blocked_worker[j] = new (worker[j]->allocate_child())
384  partition::PartitionWork(funct, 2 * j + 1, *this, true);
385  }
386  else
387  {
388  if (odds == evens)
389  {
390  worker[evens] = new (worker[j]->allocate_child())
391  partition::PartitionWork(funct,
392  2 * j + 1,
393  *this,
394  false);
395  worker[j]->spawn(*worker[evens]);
396  }
397  else
398  {
399  tbb::empty_task *child =
400  new (worker[j]->allocate_child()) tbb::empty_task();
401  worker[j]->spawn(*child);
402  }
403  }
404  }
405 
406  root->wait_for_all();
407  root->destroy(*root);
408  }
409  else // end of partition-partition, start of partition-color
410  {
411  // check whether there is only one partition. if not, build up the
412  // tree of partitions
413  if (odds > 0)
414  {
415  tbb::empty_task *root =
416  new (tbb::task::allocate_root()) tbb::empty_task;
417  root->set_ref_count(evens + 1);
418  const unsigned int n_blocked_workers =
419  odds - (odds + evens + 1) % 2;
420  const unsigned int n_workers =
421  cell_partition_data.size() - 1 - n_blocked_workers;
422  std::vector<color::PartitionWork *> worker(n_workers);
423  std::vector<color::PartitionWork *> blocked_worker(
424  n_blocked_workers);
425  unsigned int worker_index = 0, slice_index = 0;
426  unsigned int spawn_index = 0;
427  int spawn_index_child = -2;
428  MPICommunication *worker_compr =
429  new (root->allocate_child()) MPICommunication(funct, true);
430  worker_compr->set_ref_count(1);
431  for (unsigned int part = 0;
432  part < partition_row_index.size() - 1;
433  part++)
434  {
435  const unsigned int spawn_index_new = worker_index;
436  if (part == 0)
437  worker[worker_index] =
438  new (worker_compr->allocate_child())
439  color::PartitionWork(funct,
440  slice_index,
441  *this,
442  false);
443  else
444  worker[worker_index] = new (root->allocate_child())
445  color::PartitionWork(funct,
446  slice_index,
447  *this,
448  false);
449  slice_index++;
450  for (; slice_index < partition_row_index[part + 1];
451  slice_index++)
452  {
453  worker[worker_index]->set_ref_count(1);
454  worker_index++;
455  worker[worker_index] =
456  new (worker[worker_index - 1]->allocate_child())
457  color::PartitionWork(funct,
458  slice_index,
459  *this,
460  false);
461  }
462  worker[worker_index]->set_ref_count(2);
463  if (part > 0)
464  {
465  blocked_worker[(part - 1) / 2]->dummy =
466  new (worker[worker_index]->allocate_child())
467  tbb::empty_task;
468  worker_index++;
469  if (spawn_index_child == -1)
470  worker[spawn_index]->spawn(
471  *blocked_worker[(part - 1) / 2]);
472  else
473  {
474  Assert(spawn_index_child >= 0,
475  ExcInternalError());
476  worker[spawn_index]->spawn(
477  *worker[spawn_index_child]);
478  }
479  spawn_index = spawn_index_new;
480  spawn_index_child = -2;
481  }
482  else
483  {
484  MPICommunication *worker_dist =
485  new (worker[worker_index]->allocate_child())
486  MPICommunication(funct, false);
487  worker_dist->spawn(*worker_dist);
488  worker_index++;
489  }
490  part += 1;
491  if (part < partition_row_index.size() - 1)
492  {
493  if (part < partition_row_index.size() - 2)
494  {
495  blocked_worker[part / 2] =
496  new (worker[worker_index - 1]->allocate_child())
497  color::PartitionWork(funct,
498  slice_index,
499  *this,
500  true);
501  slice_index++;
502  if (slice_index < partition_row_index[part + 1])
503  {
504  blocked_worker[part / 2]->set_ref_count(1);
505  worker[worker_index] = new (
506  blocked_worker[part / 2]->allocate_child())
507  color::PartitionWork(funct,
508  slice_index,
509  *this,
510  false);
511  slice_index++;
512  }
513  else
514  {
515  spawn_index_child = -1;
516  continue;
517  }
518  }
519  for (; slice_index < partition_row_index[part + 1];
520  slice_index++)
521  {
522  if (slice_index > partition_row_index[part])
523  {
524  worker[worker_index]->set_ref_count(1);
525  worker_index++;
526  }
527  worker[worker_index] =
528  new (worker[worker_index - 1]->allocate_child())
529  color::PartitionWork(funct,
530  slice_index,
531  *this,
532  false);
533  }
534  spawn_index_child = worker_index;
535  worker_index++;
536  }
537  else
538  {
539  tbb::empty_task *final =
540  new (worker[worker_index - 1]->allocate_child())
541  tbb::empty_task;
542  worker[spawn_index]->spawn(*final);
543  spawn_index_child = worker_index - 1;
544  }
545  }
546  if (evens == odds)
547  {
548  Assert(spawn_index_child >= 0, ExcInternalError());
549  worker[spawn_index]->spawn(*worker[spawn_index_child]);
550  }
551  root->wait_for_all();
552  root->destroy(*root);
553  }
554  // case when we only have one partition: this is the usual
555  // coloring scheme, and we just schedule a parallel for loop for
556  // each color
557  else
558  {
559  Assert(evens <= 1, ExcInternalError());
561 
562  for (unsigned int color = 0; color < partition_row_index[1];
563  ++color)
564  {
565  tbb::empty_task *root =
566  new (tbb::task::allocate_root()) tbb::empty_task;
567  root->set_ref_count(2);
568  color::PartitionWork *worker =
569  new (root->allocate_child())
570  color::PartitionWork(funct, color, *this, false);
571  root->spawn(*worker);
572  root->wait_for_all();
573  root->destroy(*root);
574  }
575 
576  funct.vector_compress_start();
577  }
578  }
579  }
580  else
581 #endif
582  // serial loop, go through up to three times and do the MPI transfer at
583  // the beginning/end of the second part
584  {
585  for (unsigned int part = 0; part < partition_row_index.size() - 2;
586  ++part)
587  {
588  if (part == 1)
590 
591  for (unsigned int i = partition_row_index[part];
592  i < partition_row_index[part + 1];
593  ++i)
594  {
595  AssertIndexRange(i + 1, cell_partition_data.size());
596  if (cell_partition_data[i + 1] > cell_partition_data[i])
597  {
598  funct.zero_dst_vector_range(i);
599  funct.cell(std::make_pair(cell_partition_data[i],
600  cell_partition_data[i + 1]));
601  }
602 
603  if (face_partition_data.empty() == false)
604  {
605  if (face_partition_data[i + 1] > face_partition_data[i])
606  funct.face(std::make_pair(face_partition_data[i],
607  face_partition_data[i + 1]));
608  if (boundary_partition_data[i + 1] >
609  boundary_partition_data[i])
610  funct.boundary(
611  std::make_pair(boundary_partition_data[i],
612  boundary_partition_data[i + 1]));
613  }
614  }
615 
616  if (part == 1)
617  funct.vector_compress_start();
618  }
619  }
620  funct.vector_compress_finish();
621  }
622 
623 
624 
626  {
627  clear();
628  }
629 
630 
631 
632  void
634  {
635  n_active_cells = 0;
636  n_ghost_cells = 0;
637  vectorization_length = 1;
638  block_size = 0;
639  n_blocks = 0;
640  scheme = none;
641  partition_row_index.clear();
642  cell_partition_data.clear();
643  face_partition_data.clear();
644  boundary_partition_data.clear();
645  evens = 0;
646  odds = 0;
647  n_blocked_workers = 0;
648  n_workers = 0;
649  partition_evens.clear();
650  partition_odds.clear();
651  partition_n_blocked_workers.clear();
652  partition_n_workers.clear();
653  communicator = MPI_COMM_SELF;
654  my_pid = 0;
655  n_procs = 1;
656  }
657 
658 
659 
660  template <typename StreamType>
661  void
663  const std::size_t data_length) const
664  {
665  Utilities::MPI::MinMaxAvg memory_c =
666  Utilities::MPI::min_max_avg(1e-6 * data_length, communicator);
667  if (n_procs < 2)
668  out << memory_c.min;
669  else
670  out << memory_c.min << "/" << memory_c.avg << "/" << memory_c.max;
671  out << " MB" << std::endl;
672  }
673 
674 
675 
676  std::size_t
678  {
679  return (
680  sizeof(*this) +
681  MemoryConsumption::memory_consumption(partition_row_index) +
682  MemoryConsumption::memory_consumption(cell_partition_data) +
683  MemoryConsumption::memory_consumption(face_partition_data) +
684  MemoryConsumption::memory_consumption(boundary_partition_data) +
685  MemoryConsumption::memory_consumption(partition_evens) +
686  MemoryConsumption::memory_consumption(partition_odds) +
687  MemoryConsumption::memory_consumption(partition_n_blocked_workers) +
688  MemoryConsumption::memory_consumption(partition_n_workers));
689  }
690 
691 
692 
693  void
695  const unsigned int n_active_cells_in,
696  const unsigned int n_active_and_ghost_cells,
697  const unsigned int vectorization_length_in,
698  std::vector<unsigned int> &boundary_cells)
699  {
700  vectorization_length = vectorization_length_in;
701  n_active_cells = n_active_cells_in;
702  n_ghost_cells = n_active_and_ghost_cells - n_active_cells;
703 
704  // try to make the number of boundary cells divisible by the number of
705  // vectors in vectorization
706  unsigned int fillup_needed =
707  (vectorization_length - boundary_cells.size() % vectorization_length) %
708  vectorization_length;
709  if (fillup_needed > 0 && boundary_cells.size() < n_active_cells)
710  {
711  // fill additional cells into the list of boundary cells to get a
712  // balanced number. Go through the indices successively until we
713  // found enough indices
714  std::vector<unsigned int> new_boundary_cells;
715  new_boundary_cells.reserve(boundary_cells.size());
716 
717  unsigned int next_free_slot = 0, bound_index = 0;
718  while (fillup_needed > 0 && bound_index < boundary_cells.size())
719  {
720  if (next_free_slot < boundary_cells[bound_index])
721  {
722  // check if there are enough cells to fill with in the
723  // current slot
724  if (next_free_slot + fillup_needed <=
725  boundary_cells[bound_index])
726  {
727  for (unsigned int j =
728  boundary_cells[bound_index] - fillup_needed;
729  j < boundary_cells[bound_index];
730  ++j)
731  new_boundary_cells.push_back(j);
732  fillup_needed = 0;
733  }
734  // ok, not enough indices, so just take them all up to the
735  // next boundary cell
736  else
737  {
738  for (unsigned int j = next_free_slot;
739  j < boundary_cells[bound_index];
740  ++j)
741  new_boundary_cells.push_back(j);
742  fillup_needed -=
743  boundary_cells[bound_index] - next_free_slot;
744  }
745  }
746  new_boundary_cells.push_back(boundary_cells[bound_index]);
747  next_free_slot = boundary_cells[bound_index] + 1;
748  ++bound_index;
749  }
750  while (fillup_needed > 0 &&
751  (new_boundary_cells.size() == 0 ||
752  new_boundary_cells.back() < n_active_cells - 1))
753  new_boundary_cells.push_back(new_boundary_cells.back() + 1);
754  while (bound_index < boundary_cells.size())
755  new_boundary_cells.push_back(boundary_cells[bound_index++]);
756 
757  boundary_cells.swap(new_boundary_cells);
758  }
759 
760  // set the number of cells
761  std::sort(boundary_cells.begin(), boundary_cells.end());
762 
763  // check that number of boundary cells is divisible by
764  // vectorization_length or that it contains all cells
765  Assert(boundary_cells.size() % vectorization_length == 0 ||
766  boundary_cells.size() == n_active_cells,
767  ExcInternalError());
768  }
769 
770 
771 
772  void
774  const std::vector<unsigned int> &boundary_cells,
775  const std::vector<unsigned int> &cells_close_to_boundary,
776  const unsigned int dofs_per_cell,
777  const std::vector<unsigned int> &cell_vectorization_categories,
778  const bool cell_vectorization_categories_strict,
779  std::vector<unsigned int> & renumbering,
780  std::vector<unsigned char> & incompletely_filled_vectorization)
781  {
782  const unsigned int n_macro_cells =
783  (n_active_cells + vectorization_length - 1) / vectorization_length;
784  const unsigned int n_ghost_slots =
785  (n_ghost_cells + vectorization_length - 1) / vectorization_length;
786  const unsigned int n_boundary_cells = boundary_cells.size();
787 
788  incompletely_filled_vectorization.resize(n_macro_cells + n_ghost_slots);
789  renumbering.resize(n_active_cells + n_ghost_cells,
791 
792  // Define the outer number of partitions. In the MPI case, we have three
793  // partitions (part before comm, part with comm, part after comm)
794  if (n_procs == 1)
795  partition_row_index.resize(3);
796  else
797  partition_row_index.resize(5);
798 
799  // Initially mark the cells according to the MPI ranking
800  std::vector<unsigned char> cell_marked(n_active_cells + n_ghost_cells, 0);
801  if (n_procs > 1)
802  {
803  for (unsigned int i = 0; i < n_boundary_cells; ++i)
804  cell_marked[boundary_cells[i]] = 2;
805 
806  Assert(boundary_cells.size() % vectorization_length == 0 ||
807  boundary_cells.size() == n_active_cells,
808  ExcInternalError());
809 
810  const unsigned int n_second_slot =
811  ((n_active_cells - n_boundary_cells) / 2 / vectorization_length) *
812  vectorization_length;
813  unsigned int count = 0;
814  for (unsigned int i = 0; i < cells_close_to_boundary.size(); ++i)
815  if (cell_marked[cells_close_to_boundary[i]] == 0)
816  {
817  cell_marked[cells_close_to_boundary[i]] =
818  count < n_second_slot ? 1 : 3;
819  ++count;
820  }
821 
822  unsigned int c = 0;
823  for (; c < n_active_cells && count < n_second_slot; ++c)
824  if (cell_marked[c] == 0)
825  {
826  cell_marked[c] = 1;
827  ++count;
828  }
829  for (; c < n_active_cells; ++c)
830  if (cell_marked[c] == 0)
831  cell_marked[c] = 3;
832  for (; c < n_active_cells + n_ghost_cells; ++c)
833  if (cell_marked[c] == 0)
834  cell_marked[c] = 4;
835  }
836  else
837  std::fill(cell_marked.begin(), cell_marked.end(), 1);
838 
839  for (unsigned int i = 0; i < cell_marked.size(); ++i)
840  Assert(cell_marked[i] != 0, ExcInternalError());
841 
842  unsigned int n_categories = 1;
843  std::vector<unsigned int> tight_category_map;
844  if (cell_vectorization_categories.empty() == false)
845  {
846  AssertDimension(cell_vectorization_categories.size(),
847  n_active_cells + n_ghost_cells);
848 
849  // create a tight map of categories for not taking exceeding amounts
850  // of memory below. Sort the new categories by the numbers in the
851  // old one.
852  tight_category_map.resize(n_active_cells + n_ghost_cells);
853  std::set<unsigned int> used_categories;
854  for (unsigned int i = 0; i < n_active_cells + n_ghost_cells; ++i)
855  used_categories.insert(cell_vectorization_categories[i]);
856  std::vector<unsigned int> used_categories_vector(
857  used_categories.size());
858  n_categories = 0;
859  for (auto &it : used_categories)
860  used_categories_vector[n_categories++] = it;
861  for (unsigned int i = 0; i < n_active_cells + n_ghost_cells; ++i)
862  {
863  const unsigned int index =
864  std::lower_bound(used_categories_vector.begin(),
865  used_categories_vector.end(),
866  cell_vectorization_categories[i]) -
867  used_categories_vector.begin();
868  AssertIndexRange(index, used_categories_vector.size());
869  tight_category_map[i] = index;
870  }
871 
872  // leave some more space for empty lanes
873  incompletely_filled_vectorization.resize(
874  incompletely_filled_vectorization.size() + 4 * n_categories);
875  }
876  else if (cells_close_to_boundary.empty())
877  tight_category_map.resize(n_active_cells + n_ghost_cells, 0);
878  else
879  {
880  n_categories = 2;
881  tight_category_map.resize(n_active_cells + n_ghost_cells, 1);
882  for (unsigned int i = 0; i < cells_close_to_boundary.size(); ++i)
883  tight_category_map[cells_close_to_boundary[i]] = 0;
884  }
885 
886  cell_partition_data.clear();
887  cell_partition_data.resize(1, 0);
888  unsigned int counter = 0;
889  unsigned int n_cells = 0;
890  std::vector<std::vector<unsigned int>> renumbering_category(n_categories);
891  for (unsigned int block = 1; block < (n_procs > 1u ? 5u : 3u); ++block)
892  {
893  // step 1: sort by category
894  for (unsigned int i = 0; i < n_active_cells + n_ghost_cells; ++i)
895  if (cell_marked[i] == block)
896  renumbering_category[tight_category_map[i]].push_back(i);
897 
898  // step 2: if we want to fill up the ranges in vectorization, promote
899  // some of the cells to a higher category
900  if (cell_vectorization_categories_strict == false && n_categories > 1)
901  for (unsigned int j = n_categories - 1; j > 0; --j)
902  {
903  unsigned int lower_index = j - 1;
904  while (renumbering_category[j].size() % vectorization_length)
905  {
906  while (renumbering_category[j].size() %
907  vectorization_length &&
908  !renumbering_category[lower_index].empty())
909  {
910  renumbering_category[j].push_back(
911  renumbering_category[lower_index].back());
912  renumbering_category[lower_index].pop_back();
913  }
914  if (lower_index == 0)
915  break;
916  else
917  --lower_index;
918  }
919  }
920 
921  // step 3: append cells according to categories
922  for (unsigned int j = 0; j < n_categories; ++j)
923  {
924  for (unsigned int jj = 0; jj < renumbering_category[j].size();
925  jj++)
926  renumbering[counter++] = renumbering_category[j][jj];
927  unsigned int remainder =
928  renumbering_category[j].size() % vectorization_length;
929  if (remainder)
930  incompletely_filled_vectorization
931  [renumbering_category[j].size() / vectorization_length +
932  n_cells] = remainder;
933  const unsigned int n_my_macro_cells =
934  (renumbering_category[j].size() + vectorization_length - 1) /
935  vectorization_length;
936  renumbering_category[j].clear();
937 
938  // step 4: create blocks for face integrals, make the number of
939  // cells divisible by 4 if possible
940  const unsigned int block_size =
941  std::max((2048U / dofs_per_cell) / 8 * 4, 2U);
942  if (block < 4)
943  for (unsigned int k = 0; k < n_my_macro_cells; k += block_size)
944  cell_partition_data.push_back(
945  n_cells + std::min(k + block_size, n_my_macro_cells));
946  else
947  cell_partition_data.back() += n_my_macro_cells;
948  n_cells += n_my_macro_cells;
949  }
950  partition_row_index[block] = cell_partition_data.size() - 1;
951  if (block == 3 || (block == 1 && n_procs == 1))
952  cell_partition_data.push_back(n_cells);
953  }
954  if (cell_vectorization_categories_strict == true)
955  {
956  Assert(n_cells >= n_macro_cells + n_ghost_slots, ExcInternalError());
957  }
958  else
959  {
960  AssertDimension(n_cells, n_macro_cells + n_ghost_slots);
961  }
962  AssertDimension(cell_partition_data.back(), n_cells);
963  AssertDimension(counter, n_active_cells + n_ghost_cells);
964 
965  incompletely_filled_vectorization.resize(cell_partition_data.back());
966  }
967 
968 
969 
970  void
972  const std::vector<unsigned int> &boundary_cells,
973  std::vector<unsigned int> & renumbering,
974  std::vector<unsigned char> & incompletely_filled_vectorization)
975  {
976  const unsigned int n_macro_cells =
977  (n_active_cells + vectorization_length - 1) / vectorization_length;
978  const unsigned int n_ghost_slots =
979  (n_ghost_cells + vectorization_length - 1) / vectorization_length;
980  incompletely_filled_vectorization.resize(n_macro_cells + n_ghost_slots);
981  if (n_macro_cells * vectorization_length > n_active_cells)
982  incompletely_filled_vectorization[n_macro_cells - 1] =
983  vectorization_length -
984  (n_macro_cells * vectorization_length - n_active_cells);
985  if (n_ghost_slots * vectorization_length > n_ghost_cells)
986  incompletely_filled_vectorization[n_macro_cells + n_ghost_slots - 1] =
987  vectorization_length -
988  (n_ghost_slots * vectorization_length - n_ghost_cells);
989 
990  std::vector<unsigned int> reverse_numbering(
991  n_active_cells, numbers::invalid_unsigned_int);
992  for (unsigned int j = 0; j < boundary_cells.size(); ++j)
993  reverse_numbering[boundary_cells[j]] = j;
994  unsigned int counter = boundary_cells.size();
995  for (unsigned int j = 0; j < n_active_cells; ++j)
996  if (reverse_numbering[j] == numbers::invalid_unsigned_int)
997  reverse_numbering[j] = counter++;
998 
999  AssertDimension(counter, n_active_cells);
1000  renumbering = Utilities::invert_permutation(reverse_numbering);
1001 
1002  for (unsigned int j = n_active_cells; j < n_active_cells + n_ghost_cells;
1003  ++j)
1004  renumbering.push_back(j);
1005 
1006  // TODO: might be able to simplify this code by not relying on the cell
1007  // partition data while computing the thread graph
1008  cell_partition_data.clear();
1009  cell_partition_data.push_back(0);
1010  if (n_procs > 1)
1011  {
1012  const unsigned int n_macro_boundary_cells =
1013  (boundary_cells.size() + vectorization_length - 1) /
1014  vectorization_length;
1015  cell_partition_data.push_back(
1016  (n_macro_cells - n_macro_boundary_cells) / 2);
1017  cell_partition_data.push_back(cell_partition_data[1] +
1018  n_macro_boundary_cells);
1019  }
1020  else
1021  AssertDimension(boundary_cells.size(), 0);
1022  cell_partition_data.push_back(n_macro_cells);
1023  cell_partition_data.push_back(cell_partition_data.back() + n_ghost_slots);
1024  partition_row_index.resize(n_procs > 1 ? 4 : 2);
1025  partition_row_index[0] = 0;
1026  partition_row_index[1] = 1;
1027  if (n_procs > 1)
1028  {
1029  partition_row_index[2] = 2;
1030  partition_row_index[3] = 3;
1031  }
1032  }
1033 
1034 
1035 
1036  void
1037  TaskInfo::guess_block_size(const unsigned int dofs_per_cell)
1038  {
1039  // user did not say a positive number, so we have to guess
1040  if (block_size == 0)
1041  {
1042  // we would like to have enough work to do, so as first guess, try
1043  // to get 16 times as many chunks as we have threads on the system.
1044  block_size = n_active_cells / (MultithreadInfo::n_threads() * 16 *
1045  vectorization_length);
1046 
1047  // if there are too few degrees of freedom per cell, need to
1048  // increase the block size
1049  const unsigned int minimum_parallel_grain_size = 200;
1050  if (dofs_per_cell * block_size < minimum_parallel_grain_size)
1051  block_size = (minimum_parallel_grain_size / dofs_per_cell + 1);
1052  if (dofs_per_cell * block_size > 10000)
1053  block_size /= 4;
1054 
1055  block_size = 1 << (unsigned int)(log2(block_size + 1));
1056  }
1057  if (block_size > n_active_cells)
1058  block_size = std::max(1U, n_active_cells);
1059  }
1060 
1061 
1062 
1063  void
1065  DynamicSparsityPattern & connectivity_large,
1066  std::vector<unsigned int> & renumbering,
1067  std::vector<unsigned char> &irregular_cells,
1068  const bool)
1069  {
1070  const unsigned int n_macro_cells = *(cell_partition_data.end() - 2);
1071  if (n_macro_cells == 0)
1072  return;
1073 
1074  Assert(vectorization_length > 0, ExcInternalError());
1075 
1076  unsigned int partition = 0, counter = 0;
1077 
1078  // Create connectivity graph for blocks based on connectivity graph for
1079  // cells.
1080  DynamicSparsityPattern connectivity(n_blocks, n_blocks);
1081  make_connectivity_cells_to_blocks(irregular_cells,
1082  connectivity_large,
1083  connectivity);
1084 
1085  // Create cell-block partitioning.
1086 
1087  // For each block of cells, this variable saves to which partitions the
1088  // block belongs. Initialize all to -1 to mark them as not yet assigned
1089  // a partition.
1090  std::vector<unsigned int> cell_partition(n_blocks,
1092 
1093  // In element j of this variable, one puts the old number of the block
1094  // that should be the jth block in the new numeration.
1095  std::vector<unsigned int> partition_list(n_blocks, 0);
1096  std::vector<unsigned int> partition_color_list(n_blocks, 0);
1097 
1098  // This vector points to the start of each partition.
1099  std::vector<unsigned int> partition_size(2, 0);
1100 
1101  // blocking_connectivity = true;
1102 
1103  // The cluster_size in make_partitioning defines that the no. of cells
1104  // in each partition should be a multiple of cluster_size.
1105  unsigned int cluster_size = 1;
1106 
1107  // Make the partitioning of the first layer of the blocks of cells.
1108  make_partitioning(connectivity,
1109  cluster_size,
1110  cell_partition,
1111  partition_list,
1112  partition_size,
1113  partition);
1114 
1115  // Color the cells within each partition
1116  make_coloring_within_partitions_pre_blocked(connectivity,
1117  partition,
1118  cell_partition,
1119  partition_list,
1120  partition_size,
1121  partition_color_list);
1122 
1123  partition_list = renumbering;
1124 
1125 #ifdef DEBUG
1126  // in debug mode, check that the partition color list is one-to-one
1127  {
1128  std::vector<unsigned int> sorted_pc_list(partition_color_list);
1129  std::sort(sorted_pc_list.begin(), sorted_pc_list.end());
1130  for (unsigned int i = 0; i < sorted_pc_list.size(); ++i)
1131  Assert(sorted_pc_list[i] == i, ExcInternalError());
1132  }
1133 #endif
1134 
1135  // set the start list for each block and compute the renumbering of
1136  // cells
1137  std::vector<unsigned int> block_start(n_macro_cells + 1);
1138  std::vector<unsigned char> irregular(n_macro_cells);
1139 
1140  unsigned int mcell_start = 0;
1141  block_start[0] = 0;
1142  for (unsigned int block = 0; block < n_blocks; block++)
1143  {
1144  block_start[block + 1] = block_start[block];
1145  for (unsigned int mcell = mcell_start;
1146  mcell < std::min(mcell_start + block_size, n_macro_cells);
1147  ++mcell)
1148  {
1149  unsigned int n_comp = (irregular_cells[mcell] > 0) ?
1150  irregular_cells[mcell] :
1151  vectorization_length;
1152  block_start[block + 1] += n_comp;
1153  ++counter;
1154  }
1155  mcell_start += block_size;
1156  }
1157  counter = 0;
1158  unsigned int counter_macro = 0;
1159  unsigned int block_size_last =
1160  n_macro_cells - block_size * (n_blocks - 1);
1161  if (block_size_last == 0)
1162  block_size_last = block_size;
1163 
1164  unsigned int tick = 0;
1165  for (unsigned int block = 0; block < n_blocks; block++)
1166  {
1167  unsigned int present_block = partition_color_list[block];
1168  for (unsigned int cell = block_start[present_block];
1169  cell < block_start[present_block + 1];
1170  ++cell)
1171  renumbering[counter++] = partition_list[cell];
1172  unsigned int this_block_size =
1173  (present_block == n_blocks - 1) ? block_size_last : block_size;
1174 
1175  // Also re-compute the content of cell_partition_data to
1176  // contain the numbers of cells, not blocks
1177  if (cell_partition_data[tick] == block)
1178  cell_partition_data[tick++] = counter_macro;
1179 
1180  for (unsigned int j = 0; j < this_block_size; j++)
1181  irregular[counter_macro++] =
1182  irregular_cells[present_block * block_size + j];
1183  }
1184  AssertDimension(tick + 1, cell_partition_data.size());
1185  cell_partition_data.back() = counter_macro;
1186 
1187  irregular_cells.swap(irregular);
1188  AssertDimension(counter, n_active_cells);
1189  AssertDimension(counter_macro, n_macro_cells);
1190 
1191  // check that the renumbering is one-to-one
1192 #ifdef DEBUG
1193  {
1194  std::vector<unsigned int> sorted_renumbering(renumbering);
1195  std::sort(sorted_renumbering.begin(), sorted_renumbering.end());
1196  for (unsigned int i = 0; i < sorted_renumbering.size(); ++i)
1197  Assert(sorted_renumbering[i] == i, ExcInternalError());
1198  }
1199 #endif
1200 
1201 
1202  update_task_info(
1203  partition); // Actually sets too much for partition color case
1204 
1205  AssertDimension(cell_partition_data.back(), n_macro_cells);
1206  }
1207 
1208 
1209 
1210  void
1212  const std::vector<unsigned int> &cell_active_fe_index,
1213  DynamicSparsityPattern & connectivity,
1214  std::vector<unsigned int> & renumbering,
1215  std::vector<unsigned char> & irregular_cells,
1216  const bool hp_bool)
1217  {
1218  const unsigned int n_macro_cells = *(cell_partition_data.end() - 2);
1219  if (n_macro_cells == 0)
1220  return;
1221 
1222  Assert(vectorization_length > 0, ExcInternalError());
1223 
1224  // if we want to block before partitioning, create connectivity graph
1225  // for blocks based on connectivity graph for cells.
1226  DynamicSparsityPattern connectivity_blocks(n_blocks, n_blocks);
1227  make_connectivity_cells_to_blocks(irregular_cells,
1228  connectivity,
1229  connectivity_blocks);
1230 
1231  unsigned int n_blocks = 0;
1232  if (scheme == partition_color ||
1233  scheme == color) // blocking_connectivity == true
1234  n_blocks = this->n_blocks;
1235  else
1236  n_blocks = n_active_cells;
1237 
1238  // For each block of cells, this variable saves to which partitions the
1239  // block belongs. Initialize all to -1 to mark them as not yet assigned
1240  // a partition.
1241  std::vector<unsigned int> cell_partition(n_blocks,
1243 
1244  // In element j of this variable, one puts the old number (but after
1245  // renumbering according to the input renumbering) of the block that
1246  // should be the jth block in the new numeration.
1247  std::vector<unsigned int> partition_list(n_blocks, 0);
1248  std::vector<unsigned int> partition_2layers_list(n_blocks, 0);
1249 
1250  // This vector points to the start of each partition.
1251  std::vector<unsigned int> partition_size(2, 0);
1252 
1253  unsigned int partition = 0;
1254 
1255  // Within the partitions we want to be able to block for the case that
1256  // we do not block already in the connectivity. The cluster_size in
1257  // make_partitioning defines that the no. of cells in each partition
1258  // should be a multiple of cluster_size.
1259  unsigned int cluster_size = 1;
1260  if (scheme == partition_partition)
1261  cluster_size = block_size * vectorization_length;
1262 
1263  // Make the partitioning of the first layer of the blocks of cells.
1264  if (scheme == partition_color || scheme == color)
1265  make_partitioning(connectivity_blocks,
1266  cluster_size,
1267  cell_partition,
1268  partition_list,
1269  partition_size,
1270  partition);
1271  else
1272  make_partitioning(connectivity,
1273  cluster_size,
1274  cell_partition,
1275  partition_list,
1276  partition_size,
1277  partition);
1278 
1279  // Partition or color second layer
1280  if (scheme == partition_partition)
1281 
1282  {
1283  // Partition within partitions.
1284  make_partitioning_within_partitions_post_blocked(
1285  connectivity,
1286  cell_active_fe_index,
1287  partition,
1288  cluster_size,
1289  hp_bool,
1290  cell_partition,
1291  partition_list,
1292  partition_size,
1293  partition_2layers_list,
1294  irregular_cells);
1295  }
1296  else if (scheme == partition_color || scheme == color)
1297  {
1298  make_coloring_within_partitions_pre_blocked(connectivity_blocks,
1299  partition,
1300  cell_partition,
1301  partition_list,
1302  partition_size,
1303  partition_2layers_list);
1304  }
1305 
1306  // in debug mode, check that the partition_2layers_list is one-to-one
1307 #ifdef DEBUG
1308  {
1309  std::vector<unsigned int> sorted_pc_list(partition_2layers_list);
1310  std::sort(sorted_pc_list.begin(), sorted_pc_list.end());
1311  for (unsigned int i = 0; i < sorted_pc_list.size(); ++i)
1312  Assert(sorted_pc_list[i] == i, ExcInternalError());
1313  }
1314 #endif
1315 
1316  // Set the new renumbering
1317  std::vector<unsigned int> renumbering_in(n_active_cells, 0);
1318  renumbering_in.swap(renumbering);
1319  if (scheme == partition_partition) // blocking_connectivity == false
1320  {
1321  // This is the simple case. The renumbering is just a combination of
1322  // the renumbering that we were given as an input and the
1323  // renumbering of partition/coloring given in partition_2layers_list
1324  for (unsigned int j = 0; j < renumbering.size(); j++)
1325  renumbering[j] = renumbering_in[partition_2layers_list[j]];
1326  // Account for the ghost cells, finally.
1327  for (unsigned int i = 0; i < n_ghost_cells; ++i)
1328  renumbering.push_back(i + n_active_cells);
1329  }
1330  else
1331  {
1332  // set the start list for each block and compute the renumbering of
1333  // cells
1334  std::vector<unsigned int> block_start(n_macro_cells + 1);
1335  std::vector<unsigned char> irregular(n_macro_cells);
1336 
1337  unsigned int counter = 0;
1338  unsigned int mcell_start = 0;
1339  block_start[0] = 0;
1340  for (unsigned int block = 0; block < n_blocks; block++)
1341  {
1342  block_start[block + 1] = block_start[block];
1343  for (unsigned int mcell = mcell_start;
1344  mcell < std::min(mcell_start + block_size, n_macro_cells);
1345  ++mcell)
1346  {
1347  unsigned int n_comp = (irregular_cells[mcell] > 0) ?
1348  irregular_cells[mcell] :
1349  vectorization_length;
1350  block_start[block + 1] += n_comp;
1351  ++counter;
1352  }
1353  mcell_start += block_size;
1354  }
1355  counter = 0;
1356  unsigned int counter_macro = 0;
1357  unsigned int block_size_last =
1358  n_macro_cells - block_size * (n_blocks - 1);
1359  if (block_size_last == 0)
1360  block_size_last = block_size;
1361 
1362  unsigned int tick = 0;
1363  for (unsigned int block = 0; block < n_blocks; block++)
1364  {
1365  unsigned int present_block = partition_2layers_list[block];
1366  for (unsigned int cell = block_start[present_block];
1367  cell < block_start[present_block + 1];
1368  ++cell)
1369  renumbering[counter++] = renumbering_in[cell];
1370  unsigned int this_block_size =
1371  (present_block == n_blocks - 1) ? block_size_last : block_size;
1372 
1373  // Also re-compute the content of cell_partition_data to
1374  // contain the numbers of cells, not blocks
1375  if (cell_partition_data[tick] == block)
1376  cell_partition_data[tick++] = counter_macro;
1377 
1378  for (unsigned int j = 0; j < this_block_size; j++)
1379  irregular[counter_macro++] =
1380  irregular_cells[present_block * block_size + j];
1381  }
1382  AssertDimension(tick + 1, cell_partition_data.size());
1383  cell_partition_data.back() = counter_macro;
1384 
1385  irregular_cells.swap(irregular);
1386  AssertDimension(counter, n_active_cells);
1387  AssertDimension(counter_macro, n_macro_cells);
1388  // check that the renumbering is one-to-one
1389 #ifdef DEBUG
1390  {
1391  std::vector<unsigned int> sorted_renumbering(renumbering);
1392  std::sort(sorted_renumbering.begin(), sorted_renumbering.end());
1393  for (unsigned int i = 0; i < sorted_renumbering.size(); ++i)
1394  Assert(sorted_renumbering[i] == i, ExcInternalError());
1395  }
1396 #endif
1397  }
1398 
1399  // Update the task_info with the more information for the thread graph.
1400  update_task_info(partition);
1401  }
1402 
1403 
1404 
1405  void
1407  const std::vector<unsigned int> &cell_active_fe_index,
1408  DynamicSparsityPattern & connectivity,
1409  std::vector<unsigned int> & renumbering,
1410  std::vector<unsigned char> & irregular_cells,
1411  const bool hp_bool)
1412  {
1413  const unsigned int n_macro_cells = *(cell_partition_data.end() - 2);
1414  if (n_macro_cells == 0)
1415  return;
1416 
1417  const unsigned int cluster_size = block_size * vectorization_length;
1418 
1419  // Create cell-block partitioning.
1420 
1421  // For each block of cells, this variable saves to which partitions the
1422  // block belongs. Initialize all to n_macro_cells to mark them as not
1423  // yet assigned a partition.
1424  std::vector<unsigned int> cell_partition(n_active_cells,
1426 
1427 
1428  // In element j of this variable, one puts the old number of the block
1429  // that should be the jth block in the new numeration.
1430  std::vector<unsigned int> partition_list(n_active_cells, 0);
1431  std::vector<unsigned int> partition_partition_list(n_active_cells, 0);
1432 
1433  // This vector points to the start of each partition.
1434  std::vector<unsigned int> partition_size(2, 0);
1435 
1436  unsigned int partition = 0;
1437  // Here, we do not block inside the connectivity graph
1438  // blocking_connectivity = false;
1439 
1440  // Make the partitioning of the first layer of the blocks of cells.
1441  make_partitioning(connectivity,
1442  cluster_size,
1443  cell_partition,
1444  partition_list,
1445  partition_size,
1446  partition);
1447 
1448  // Partition within partitions.
1449  make_partitioning_within_partitions_post_blocked(connectivity,
1450  cell_active_fe_index,
1451  partition,
1452  cluster_size,
1453  hp_bool,
1454  cell_partition,
1455  partition_list,
1456  partition_size,
1457  partition_partition_list,
1458  irregular_cells);
1459 
1460  partition_list.swap(renumbering);
1461 
1462  for (unsigned int j = 0; j < renumbering.size(); j++)
1463  renumbering[j] = partition_list[partition_partition_list[j]];
1464 
1465  for (unsigned int i = 0; i < n_ghost_cells; ++i)
1466  renumbering.push_back(i + n_active_cells);
1467 
1468  update_task_info(partition);
1469  }
1470 
1471 
1472 
1473  void
1475  const std::vector<unsigned char> &irregular_cells,
1476  const DynamicSparsityPattern & connectivity_cells,
1477  DynamicSparsityPattern & connectivity_blocks) const
1478  {
1479  std::vector<std::vector<unsigned int>> cell_blocks(n_blocks);
1480  std::vector<unsigned int> touched_cells(n_active_cells);
1481  unsigned int cell = 0;
1482  for (unsigned int i = 0, mcell = 0; i < n_blocks; ++i)
1483  {
1484  for (unsigned int c = 0;
1485  c < block_size && mcell < *(cell_partition_data.end() - 2);
1486  ++c, ++mcell)
1487  {
1488  unsigned int ncomp = (irregular_cells[mcell] > 0) ?
1489  irregular_cells[mcell] :
1490  vectorization_length;
1491  for (unsigned int c = 0; c < ncomp; ++c, ++cell)
1492  {
1493  cell_blocks[i].push_back(cell);
1494  touched_cells[cell] = i;
1495  }
1496  }
1497  }
1498  AssertDimension(cell, n_active_cells);
1499  for (unsigned int i = 0; i < cell_blocks.size(); ++i)
1500  for (unsigned int col = 0; col < cell_blocks[i].size(); ++col)
1501  {
1503  connectivity_cells.begin(cell_blocks[i][col]);
1504  it != connectivity_cells.end(cell_blocks[i][col]);
1505  ++it)
1506  {
1507  if (touched_cells[it->column()] != i)
1508  connectivity_blocks.add(i, touched_cells[it->column()]);
1509  }
1510  }
1511  }
1512 
1513 
1514 
1515  // Function to create partitioning on the second layer within each
1516  // partition. Version without preblocking.
1517  void
1519  const DynamicSparsityPattern & connectivity,
1520  const std::vector<unsigned int> &cell_active_fe_index,
1521  const unsigned int partition,
1522  const unsigned int cluster_size,
1523  const bool hp_bool,
1524  const std::vector<unsigned int> &cell_partition,
1525  const std::vector<unsigned int> &partition_list,
1526  const std::vector<unsigned int> &partition_size,
1527  std::vector<unsigned int> & partition_partition_list,
1528  std::vector<unsigned char> & irregular_cells)
1529  {
1530  const unsigned int n_macro_cells = *(cell_partition_data.end() - 2);
1531  const unsigned int n_ghost_slots =
1532  *(cell_partition_data.end() - 1) - n_macro_cells;
1533 
1534  // List of cells in previous partition
1535  std::vector<unsigned int> neighbor_list;
1536  // List of cells in current partition for use as neighbors in next
1537  // partition
1538  std::vector<unsigned int> neighbor_neighbor_list;
1539 
1540  std::vector<unsigned int> renumbering(n_active_cells);
1541 
1542  irregular_cells.back() = 0;
1543  irregular_cells.resize(n_active_cells + n_ghost_slots);
1544 
1545  unsigned int max_fe_index = 0;
1546  for (unsigned int i = 0; i < cell_active_fe_index.size(); ++i)
1547  max_fe_index = std::max(cell_active_fe_index[i], max_fe_index);
1548  Assert(!hp_bool || cell_active_fe_index.size() == n_active_cells,
1549  ExcInternalError());
1550 
1551  {
1552  unsigned int n_macro_cells_before = 0;
1553  // Create partitioning within partitions.
1554 
1555  // For each block of cells, this variable saves to which partitions
1556  // the block belongs. Initialize all to n_macro_cells to mark them as
1557  // not yet assigned a partition.
1558  std::vector<unsigned int> cell_partition_l2(
1559  n_active_cells, numbers::invalid_unsigned_int);
1560  partition_row_index.clear();
1561  partition_row_index.resize(partition + 1, 0);
1562  cell_partition_data.resize(1, 0);
1563 
1564  unsigned int counter = 0;
1565  unsigned int missing_macros;
1566  for (unsigned int part = 0; part < partition; ++part)
1567  {
1568  neighbor_neighbor_list.resize(0);
1569  neighbor_list.resize(0);
1570  bool work = true;
1571  unsigned int partition_l2 = 0;
1572  unsigned int start_up = partition_size[part];
1573  unsigned int partition_counter = 0;
1574  while (work)
1575  {
1576  if (neighbor_list.size() == 0)
1577  {
1578  work = false;
1579  partition_counter = 0;
1580  for (unsigned int j = start_up;
1581  j < partition_size[part + 1];
1582  ++j)
1583  if (cell_partition[partition_list[j]] == part &&
1584  cell_partition_l2[partition_list[j]] ==
1586  {
1587  start_up = j;
1588  work = true;
1589  partition_counter = 1;
1590  // To start up, set the start_up cell to partition
1591  // and list all its neighbors.
1592  AssertIndexRange(start_up, partition_size[part + 1]);
1593  cell_partition_l2[partition_list[start_up]] =
1594  partition_l2;
1595  neighbor_neighbor_list.push_back(
1596  partition_list[start_up]);
1597  partition_partition_list[counter++] =
1598  partition_list[start_up];
1599  start_up++;
1600  break;
1601  }
1602  }
1603  else
1604  {
1605  partition_counter = 0;
1606  for (unsigned int j = 0; j < neighbor_list.size(); ++j)
1607  {
1608  Assert(cell_partition[neighbor_list[j]] == part,
1609  ExcInternalError());
1610  Assert(cell_partition_l2[neighbor_list[j]] ==
1611  partition_l2 - 1,
1612  ExcInternalError());
1614  connectivity.begin(
1615  neighbor_list[j]),
1616  end = connectivity.end(
1617  neighbor_list[j]);
1618  for (; neighbor != end; ++neighbor)
1619  {
1620  if (cell_partition[neighbor->column()] == part &&
1621  cell_partition_l2[neighbor->column()] ==
1623  {
1624  cell_partition_l2[neighbor->column()] =
1625  partition_l2;
1626  neighbor_neighbor_list.push_back(
1627  neighbor->column());
1628  partition_partition_list[counter++] =
1629  neighbor->column();
1630  partition_counter++;
1631  }
1632  }
1633  }
1634  }
1635  if (partition_counter > 0)
1636  {
1637  int index_before = neighbor_neighbor_list.size(),
1638  index = index_before;
1639  {
1640  // put the cells into separate lists for each FE index
1641  // within one partition-partition
1642  missing_macros = 0;
1643  std::vector<unsigned int> remaining_per_macro_cell(
1644  max_fe_index + 1);
1645  std::vector<std::vector<unsigned int>>
1646  renumbering_fe_index;
1647  unsigned int cell;
1648  bool filled = true;
1649  if (hp_bool == true)
1650  {
1651  renumbering_fe_index.resize(max_fe_index + 1);
1652  for (cell = counter - partition_counter;
1653  cell < counter;
1654  ++cell)
1655  {
1656  renumbering_fe_index
1657  [cell_active_fe_index.empty() ?
1658  0 :
1659  cell_active_fe_index
1660  [partition_partition_list[cell]]]
1661  .push_back(partition_partition_list[cell]);
1662  }
1663  // check how many more cells are needed in the lists
1664  for (unsigned int j = 0; j < max_fe_index + 1; j++)
1665  {
1666  remaining_per_macro_cell[j] =
1667  renumbering_fe_index[j].size() %
1668  vectorization_length;
1669  if (remaining_per_macro_cell[j] != 0)
1670  filled = false;
1671  missing_macros +=
1672  ((renumbering_fe_index[j].size() +
1673  vectorization_length - 1) /
1674  vectorization_length);
1675  }
1676  }
1677  else
1678  {
1679  remaining_per_macro_cell.resize(1);
1680  remaining_per_macro_cell[0] =
1681  partition_counter % vectorization_length;
1682  missing_macros =
1683  partition_counter / vectorization_length;
1684  if (remaining_per_macro_cell[0] != 0)
1685  {
1686  filled = false;
1687  missing_macros++;
1688  }
1689  }
1690  missing_macros =
1691  cluster_size - (missing_macros % cluster_size);
1692 
1693  // now we realized that there are some cells missing.
1694  while (missing_macros > 0 || filled == false)
1695  {
1696  if (index == 0)
1697  {
1698  index = neighbor_neighbor_list.size();
1699  if (index == index_before)
1700  {
1701  if (missing_macros != 0)
1702  {
1703  neighbor_neighbor_list.resize(0);
1704  }
1705  start_up--;
1706  break; // not connected - start again
1707  }
1708  index_before = index;
1709  }
1710  index--;
1711  unsigned int additional =
1712  neighbor_neighbor_list[index];
1713 
1714  // go through the neighbors of the last cell in the
1715  // current partition and check if we find some to
1716  // fill up with.
1718  connectivity.begin(
1719  additional),
1720  end =
1721  connectivity.end(
1722  additional);
1723  for (; neighbor != end; ++neighbor)
1724  {
1725  if (cell_partition[neighbor->column()] == part &&
1726  cell_partition_l2[neighbor->column()] ==
1728  {
1729  unsigned int this_index = 0;
1730  if (hp_bool == true)
1731  this_index =
1732  cell_active_fe_index.empty() ?
1733  0 :
1734  cell_active_fe_index[neighbor
1735  ->column()];
1736 
1737  // Only add this cell if we need more macro
1738  // cells in the current block or if there is
1739  // a macro cell with the FE index that is
1740  // not yet fully populated
1741  if (missing_macros > 0 ||
1742  remaining_per_macro_cell[this_index] > 0)
1743  {
1744  cell_partition_l2[neighbor->column()] =
1745  partition_l2;
1746  neighbor_neighbor_list.push_back(
1747  neighbor->column());
1748  if (hp_bool == true)
1749  renumbering_fe_index[this_index]
1750  .push_back(neighbor->column());
1751  partition_partition_list[counter] =
1752  neighbor->column();
1753  counter++;
1754  partition_counter++;
1755  if (remaining_per_macro_cell
1756  [this_index] == 0 &&
1757  missing_macros > 0)
1758  missing_macros--;
1759  remaining_per_macro_cell[this_index]++;
1760  if (remaining_per_macro_cell
1761  [this_index] ==
1762  vectorization_length)
1763  {
1764  remaining_per_macro_cell[this_index] =
1765  0;
1766  }
1767  if (missing_macros == 0)
1768  {
1769  filled = true;
1770  for (unsigned int fe_ind = 0;
1771  fe_ind < max_fe_index + 1;
1772  ++fe_ind)
1773  if (remaining_per_macro_cell
1774  [fe_ind] != 0)
1775  filled = false;
1776  }
1777  if (filled == true)
1778  break;
1779  }
1780  }
1781  }
1782  }
1783  if (hp_bool == true)
1784  {
1785  // set the renumbering according to their active FE
1786  // index within one partition-partition which was
1787  // implicitly assumed above
1788  cell = counter - partition_counter;
1789  for (unsigned int j = 0; j < max_fe_index + 1; j++)
1790  {
1791  for (unsigned int jj = 0;
1792  jj < renumbering_fe_index[j].size();
1793  jj++)
1794  renumbering[cell++] =
1795  renumbering_fe_index[j][jj];
1796  if (renumbering_fe_index[j].size() %
1797  vectorization_length !=
1798  0)
1799  irregular_cells[renumbering_fe_index[j].size() /
1800  vectorization_length +
1801  n_macro_cells_before] =
1802  renumbering_fe_index[j].size() %
1803  vectorization_length;
1804  n_macro_cells_before +=
1805  (renumbering_fe_index[j].size() +
1806  vectorization_length - 1) /
1807  vectorization_length;
1808  renumbering_fe_index[j].resize(0);
1809  }
1810  }
1811  else
1812  {
1813  n_macro_cells_before +=
1814  partition_counter / vectorization_length;
1815  if (partition_counter % vectorization_length != 0)
1816  {
1817  irregular_cells[n_macro_cells_before] =
1818  partition_counter % vectorization_length;
1819  n_macro_cells_before++;
1820  }
1821  }
1822  }
1823  cell_partition_data.push_back(n_macro_cells_before);
1824  partition_l2++;
1825  }
1826  neighbor_list = neighbor_neighbor_list;
1827  neighbor_neighbor_list.resize(0);
1828  }
1829  partition_row_index[part + 1] =
1830  partition_row_index[part] + partition_l2;
1831  }
1832  }
1833  if (hp_bool == true)
1834  {
1835  partition_partition_list.swap(renumbering);
1836  }
1837  }
1838 
1839 
1840 
1841  // Function to create coloring on the second layer within each partition.
1842  // Version assumes preblocking.
1843  void
1845  const DynamicSparsityPattern & connectivity,
1846  const unsigned int partition,
1847  const std::vector<unsigned int> &cell_partition,
1848  const std::vector<unsigned int> &partition_list,
1849  const std::vector<unsigned int> &partition_size,
1850  std::vector<unsigned int> & partition_color_list)
1851  {
1852  const unsigned int n_macro_cells = *(cell_partition_data.end() - 2);
1853  std::vector<unsigned int> cell_color(n_blocks, n_macro_cells);
1854  std::vector<bool> color_finder;
1855 
1856  partition_row_index.resize(partition + 1);
1857  cell_partition_data.clear();
1858  unsigned int color_counter = 0, index_counter = 0;
1859  for (unsigned int part = 0; part < partition; part++)
1860  {
1861  partition_row_index[part] = index_counter;
1862  unsigned int max_color = 0;
1863  for (unsigned int k = partition_size[part];
1864  k < partition_size[part + 1];
1865  k++)
1866  {
1867  unsigned int cell = partition_list[k];
1868  unsigned int n_neighbors = connectivity.row_length(cell);
1869 
1870  // In the worst case, each neighbor has a different color. So we
1871  // find at least one available color between 0 and n_neighbors.
1872  color_finder.resize(n_neighbors + 1);
1873  for (unsigned int j = 0; j <= n_neighbors; ++j)
1874  color_finder[j] = true;
1876  connectivity.begin(cell),
1877  end = connectivity.end(cell);
1878  for (; neighbor != end; ++neighbor)
1879  {
1880  // Mark the color that a neighbor within the partition has
1881  // as taken
1882  if (cell_partition[neighbor->column()] == part &&
1883  cell_color[neighbor->column()] <= n_neighbors)
1884  color_finder[cell_color[neighbor->column()]] = false;
1885  }
1886  // Choose the smallest color that is not taken for the block
1887  cell_color[cell] = 0;
1888  while (color_finder[cell_color[cell]] == false)
1889  cell_color[cell]++;
1890  if (cell_color[cell] > max_color)
1891  max_color = cell_color[cell];
1892  }
1893  // Reorder within partition: First, all blocks that belong the 0 and
1894  // then so on until those with color max (Note that the smaller the
1895  // number the larger the partition)
1896  for (unsigned int color = 0; color <= max_color; color++)
1897  {
1898  cell_partition_data.push_back(color_counter);
1899  index_counter++;
1900  for (unsigned int k = partition_size[part];
1901  k < partition_size[part + 1];
1902  k++)
1903  {
1904  unsigned int cell = partition_list[k];
1905  if (cell_color[cell] == color)
1906  {
1907  partition_color_list[color_counter++] = cell;
1908  }
1909  }
1910  }
1911  }
1912  cell_partition_data.push_back(n_blocks);
1913  partition_row_index[partition] = index_counter;
1914  AssertDimension(color_counter, n_blocks);
1915  }
1916 
1917 
1918  // Function to create partitioning on the first layer.
1919  void
1921  const unsigned int cluster_size,
1922  std::vector<unsigned int> & cell_partition,
1923  std::vector<unsigned int> & partition_list,
1924  std::vector<unsigned int> & partition_size,
1925  unsigned int & partition) const
1926 
1927  {
1928  // For each block of cells, this variable saves to which partitions the
1929  // block belongs. Initialize all to n_macro_cells to mark them as not
1930  // yet assigned a partition.
1931  // std::vector<unsigned int> cell_partition (n_active_cells,
1932  // numbers::invalid_unsigned_int);
1933  // List of cells in previous partition
1934  std::vector<unsigned int> neighbor_list;
1935  // List of cells in current partition for use as neighbors in next
1936  // partition
1937  std::vector<unsigned int> neighbor_neighbor_list;
1938 
1939  // In element j of this variable, one puts the old number of the block
1940  // that should be the jth block in the new numeration.
1941  // std::vector<unsigned int> partition_list(n_active_cells,0);
1942 
1943  // This vector points to the start of each partition.
1944  // std::vector<unsigned int> partition_size(2,0);
1945 
1946  partition = 0;
1947  unsigned int counter = 0;
1948  unsigned int start_nonboundary =
1949  cell_partition_data.size() == 5 ?
1950  vectorization_length *
1951  (cell_partition_data[2] - cell_partition_data[1]) :
1952  0;
1953 
1954  const unsigned int n_macro_cells = *(cell_partition_data.end() - 2);
1955  if (n_macro_cells == 0)
1956  return;
1957  if (scheme == color)
1958  start_nonboundary = n_macro_cells;
1959  if (scheme == partition_color ||
1960  scheme == color) // blocking_connectivity == true
1961  start_nonboundary = ((start_nonboundary + block_size - 1) / block_size);
1962  unsigned int n_blocks;
1963  if (scheme == partition_color ||
1964  scheme == color) // blocking_connectivity == true
1965  n_blocks = this->n_blocks;
1966  else
1967  n_blocks = n_active_cells;
1968 
1969  if (start_nonboundary > n_blocks)
1970  start_nonboundary = n_blocks;
1971 
1972 
1973  unsigned int start_up = 0;
1974  bool work = true;
1975  unsigned int remainder = cluster_size;
1976 
1977  // this performs a classical breath-first search in the connectivity
1978  // graph of the cells under the restriction that the size of the
1979  // partitions should be a multiple of the given block size
1980  while (work)
1981  {
1982  // put the cells with neighbors on remote MPI processes up front
1983  if (start_nonboundary > 0)
1984  {
1985  for (unsigned int cell = 0; cell < start_nonboundary; ++cell)
1986  {
1987  const unsigned int cell_nn = cell;
1988  cell_partition[cell_nn] = partition;
1989  neighbor_list.push_back(cell_nn);
1990  partition_list[counter++] = cell_nn;
1991  partition_size.back()++;
1992  }
1993  start_nonboundary = 0;
1994  remainder -= (start_nonboundary % cluster_size);
1995  if (remainder == cluster_size)
1996  remainder = 0;
1997  }
1998  else
1999  {
2000  // To start up, set the start_up cell to partition and list all
2001  // its neighbors.
2002  cell_partition[start_up] = partition;
2003  neighbor_list.push_back(start_up);
2004  partition_list[counter++] = start_up;
2005  partition_size.back()++;
2006  start_up++;
2007  remainder--;
2008  if (remainder == cluster_size)
2009  remainder = 0;
2010  }
2011  int index_before = neighbor_list.size(), index = index_before,
2012  index_stop = 0;
2013  while (remainder > 0)
2014  {
2015  if (index == index_stop)
2016  {
2017  index = neighbor_list.size();
2018  if (index == index_before)
2019  {
2020  neighbor_list.resize(0);
2021  goto not_connect;
2022  }
2023  index_stop = index_before;
2024  index_before = index;
2025  }
2026  index--;
2027  unsigned int additional = neighbor_list[index];
2029  connectivity.begin(additional),
2030  end =
2031  connectivity.end(additional);
2032  for (; neighbor != end; ++neighbor)
2033  {
2034  if (cell_partition[neighbor->column()] ==
2036  {
2037  partition_size.back()++;
2038  cell_partition[neighbor->column()] = partition;
2039  neighbor_list.push_back(neighbor->column());
2040  partition_list[counter++] = neighbor->column();
2041  remainder--;
2042  if (remainder == 0)
2043  break;
2044  }
2045  }
2046  }
2047 
2048  while (neighbor_list.size() > 0)
2049  {
2050  partition++;
2051 
2052  // counter for number of cells so far in current partition
2053  unsigned int partition_counter = 0;
2054 
2055  // Mark the start of the new partition
2056  partition_size.push_back(partition_size.back());
2057 
2058  // Loop through the list of cells in previous partition and put
2059  // all their neighbors in current partition
2060  for (unsigned int j = 0; j < neighbor_list.size(); ++j)
2061  {
2062  Assert(cell_partition[neighbor_list[j]] == partition - 1,
2063  ExcInternalError());
2065  connectivity.begin(
2066  neighbor_list[j]),
2067  end = connectivity.end(
2068  neighbor_list[j]);
2069  for (; neighbor != end; ++neighbor)
2070  {
2071  if (cell_partition[neighbor->column()] ==
2073  {
2074  partition_size.back()++;
2075  cell_partition[neighbor->column()] = partition;
2076 
2077  // collect the cells of the current partition for
2078  // use as neighbors in next partition
2079  neighbor_neighbor_list.push_back(neighbor->column());
2080  partition_list[counter++] = neighbor->column();
2081  partition_counter++;
2082  }
2083  }
2084  }
2085  remainder = cluster_size - (partition_counter % cluster_size);
2086  if (remainder == cluster_size)
2087  remainder = 0;
2088  int index_stop = 0;
2089  int index_before = neighbor_neighbor_list.size(),
2090  index = index_before;
2091  while (remainder > 0)
2092  {
2093  if (index == index_stop)
2094  {
2095  index = neighbor_neighbor_list.size();
2096  if (index == index_before)
2097  {
2098  neighbor_neighbor_list.resize(0);
2099  break;
2100  }
2101  index_stop = index_before;
2102  index_before = index;
2103  }
2104  index--;
2105  unsigned int additional = neighbor_neighbor_list[index];
2107  connectivity.begin(
2108  additional),
2109  end = connectivity.end(
2110  additional);
2111  for (; neighbor != end; ++neighbor)
2112  {
2113  if (cell_partition[neighbor->column()] ==
2115  {
2116  partition_size.back()++;
2117  cell_partition[neighbor->column()] = partition;
2118  neighbor_neighbor_list.push_back(neighbor->column());
2119  partition_list[counter++] = neighbor->column();
2120  remainder--;
2121  if (remainder == 0)
2122  break;
2123  }
2124  }
2125  }
2126 
2127  neighbor_list = neighbor_neighbor_list;
2128  neighbor_neighbor_list.resize(0);
2129  }
2130  not_connect:
2131  // One has to check if the graph is not connected so we have to find
2132  // another partition.
2133  work = false;
2134  for (unsigned int j = start_up; j < n_blocks; ++j)
2135  if (cell_partition[j] == numbers::invalid_unsigned_int)
2136  {
2137  start_up = j;
2138  work = true;
2139  if (remainder == 0)
2140  remainder = cluster_size;
2141  break;
2142  }
2143  }
2144  if (remainder != 0)
2145  partition++;
2146 
2147  AssertDimension(partition_size[partition], n_blocks);
2148  }
2149 
2150 
2151  void
2152  TaskInfo::update_task_info(const unsigned int partition)
2153  {
2154  evens = (partition + 1) / 2;
2155  odds = partition / 2;
2156  n_blocked_workers = odds - (odds + evens + 1) % 2;
2157  n_workers = evens + odds - n_blocked_workers;
2158  // From here only used for partition partition option.
2159  partition_evens.resize(partition);
2160  partition_odds.resize(partition);
2161  partition_n_blocked_workers.resize(partition);
2162  partition_n_workers.resize(partition);
2163  for (unsigned int part = 0; part < partition; part++)
2164  {
2165  partition_evens[part] =
2166  (partition_row_index[part + 1] - partition_row_index[part] + 1) / 2;
2167  partition_odds[part] =
2168  (partition_row_index[part + 1] - partition_row_index[part]) / 2;
2169  partition_n_blocked_workers[part] =
2170  partition_odds[part] -
2171  (partition_odds[part] + partition_evens[part] + 1) % 2;
2172  partition_n_workers[part] = partition_evens[part] +
2173  partition_odds[part] -
2174  partition_n_blocked_workers[part];
2175  }
2176  }
2177  } // namespace MatrixFreeFunctions
2178 } // namespace internal
2179 
2180 
2181 
2182 // explicit instantiations of template functions
2183 template void
2184 internal::MatrixFreeFunctions::TaskInfo::print_memory_statistics<std::ostream>(
2185  std::ostream &,
2186  const std::size_t) const;
2187 template void
2189  ConditionalOStream>(ConditionalOStream &, const std::size_t) const;
2190 
2191 
2192 DEAL_II_NAMESPACE_CLOSE
size_type row_length(const size_type row) const
static const unsigned int invalid_unsigned_int
Definition: types.h:173
#define AssertDimension(dim1, dim2)
Definition: exceptions.h:1366
void print_memory_statistics(StreamType &out, std::size_t data_length) const
Definition: task_info.cc:662
virtual void cell(const std::pair< unsigned int, unsigned int > &cell_range)=0
void add(const size_type i, const size_type j)
void guess_block_size(const unsigned int dofs_per_cell)
Definition: task_info.cc:1037
#define AssertIndexRange(index, range)
Definition: exceptions.h:1407
std::size_t memory_consumption() const
Definition: task_info.cc:677
virtual void vector_update_ghosts_finish()=0
Finishes the communication for the update ghost values operation.
#define AssertThrow(cond, exc)
Definition: exceptions.h:1329
void make_partitioning_within_partitions_post_blocked(const DynamicSparsityPattern &connectivity, const std::vector< unsigned int > &cell_active_fe_index, const unsigned int partition, const unsigned int cluster_size, const bool hp_bool, const std::vector< unsigned int > &cell_partition, const std::vector< unsigned int > &partition_list, const std::vector< unsigned int > &partition_size, std::vector< unsigned int > &partition_partition_list, std::vector< unsigned char > &irregular_cells)
Definition: task_info.cc:1518
void make_coloring_within_partitions_pre_blocked(const DynamicSparsityPattern &connectivity, const unsigned int partition, const std::vector< unsigned int > &cell_partition, const std::vector< unsigned int > &partition_list, const std::vector< unsigned int > &partition_size, std::vector< unsigned int > &partition_color_list)
Definition: task_info.cc:1844
void loop(MFWorkerInterface &worker) const
Definition: task_info.cc:337
void partition(const SparsityPattern &sparsity_pattern, const unsigned int n_partitions, std::vector< unsigned int > &partition_indices, const Partitioner partitioner=Partitioner::metis)
#define Assert(cond, exc)
Definition: exceptions.h:1227
void make_thread_graph_partition_color(DynamicSparsityPattern &connectivity, std::vector< unsigned int > &renumbering, std::vector< unsigned char > &irregular_cells, const bool hp_bool)
Definition: task_info.cc:1064
void update_task_info(const unsigned int partition)
Definition: task_info.cc:2152
void initial_setup_blocks_tasks(const std::vector< unsigned int > &boundary_cells, std::vector< unsigned int > &renumbering, std::vector< unsigned char > &incompletely_filled_vectorization)
Definition: task_info.cc:971
virtual void zero_dst_vector_range(const unsigned int range_index)=0
virtual void boundary(const std::pair< unsigned int, unsigned int > &face_range)=0
void create_blocks_serial(const std::vector< unsigned int > &boundary_cells, const std::vector< unsigned int > &cells_close_to_boundary, const unsigned int dofs_per_cell, const std::vector< unsigned int > &cell_vectorization_categories, const bool cell_vectorization_categories_strict, std::vector< unsigned int > &renumbering, std::vector< unsigned char > &incompletely_filled_vectorization)
Definition: task_info.cc:773
std::vector< unsigned int > invert_permutation(const std::vector< unsigned int > &permutation)
Definition: utilities.cc:503
void make_connectivity_cells_to_blocks(const std::vector< unsigned char > &irregular_cells, const DynamicSparsityPattern &connectivity_cells, DynamicSparsityPattern &connectivity_blocks) const
Definition: task_info.cc:1474
virtual void vector_update_ghosts_start()=0
Starts the communication for the update ghost values operation.
virtual void face(const std::pair< unsigned int, unsigned int > &face_range)=0
void collect_boundary_cells(const unsigned int n_active_cells, const unsigned int n_active_and_ghost_cells, const unsigned int vectorization_length, std::vector< unsigned int > &boundary_cells)
Definition: task_info.cc:694
void make_thread_graph_partition_partition(const std::vector< unsigned int > &cell_active_fe_index, DynamicSparsityPattern &connectivity, std::vector< unsigned int > &renumbering, std::vector< unsigned char > &irregular_cells, const bool hp_bool)
Definition: task_info.cc:1406
virtual void vector_compress_start()=0
Starts the communication for the vector compress operation.
void make_partitioning(const DynamicSparsityPattern &connectivity, const unsigned int cluster_size, std::vector< unsigned int > &cell_partition, std::vector< unsigned int > &partition_list, std::vector< unsigned int > &partition_size, unsigned int &partition) const
Definition: task_info.cc:1920
static::ExceptionBase & ExcNotImplemented()
static unsigned int n_threads()
MinMaxAvg min_max_avg(const double my_value, const MPI_Comm &mpi_communicator)
Definition: mpi.cc:341
std::enable_if< std::is_fundamental< T >::value, std::size_t >::type memory_consumption(const T &t)
static::ExceptionBase & ExcInternalError()
void make_thread_graph(const std::vector< unsigned int > &cell_active_fe_index, DynamicSparsityPattern &connectivity, std::vector< unsigned int > &renumbering, std::vector< unsigned char > &irregular_cells, const bool hp_bool)
Definition: task_info.cc:1211
virtual void vector_compress_finish()=0
Finishes the communication for the vector compress operation.