17 #include <deal.II/base/conditional_ostream.h> 18 #include <deal.II/base/memory_consumption.h> 19 #include <deal.II/base/mpi.h> 20 #include <deal.II/base/multithread_info.h> 21 #include <deal.II/base/parallel.h> 22 #include <deal.II/base/utilities.h> 24 #include <deal.II/matrix_free/task_info.h> 27 #ifdef DEAL_II_WITH_THREADS 28 # include <tbb/blocked_range.h> 29 # include <tbb/parallel_for.h> 30 # include <tbb/task.h> 31 # include <tbb/task_scheduler_init.h> 37 DEAL_II_NAMESPACE_OPEN
44 namespace MatrixFreeFunctions
46 #ifdef DEAL_II_WITH_THREADS 56 ActualCellWork(MFWorkerInterface **worker_pointer,
57 const unsigned int partition,
58 const TaskInfo & task_info)
60 , worker_pointer(worker_pointer)
61 , partition(partition)
62 , task_info(task_info)
65 ActualCellWork(MFWorkerInterface &worker,
66 const unsigned int partition,
67 const TaskInfo & task_info)
69 , worker_pointer(nullptr)
70 , partition(partition)
71 , task_info(task_info)
77 MFWorkerInterface *used_worker =
78 worker !=
nullptr ? worker : *worker_pointer;
81 std::make_pair(task_info.cell_partition_data[partition],
82 task_info.cell_partition_data[partition + 1]));
84 if (task_info.face_partition_data.empty() ==
false)
87 std::make_pair(task_info.face_partition_data[partition],
88 task_info.face_partition_data[partition + 1]));
90 used_worker->boundary(std::make_pair(
91 task_info.boundary_partition_data[partition],
92 task_info.boundary_partition_data[partition + 1]));
97 MFWorkerInterface * worker;
98 MFWorkerInterface **worker_pointer;
100 const TaskInfo & task_info;
103 class CellWork :
public tbb::task
106 CellWork(MFWorkerInterface &worker,
107 const unsigned int partition,
108 const TaskInfo & task_info,
109 const bool is_blocked)
111 , work(worker, partition, task_info)
112 , is_blocked(is_blocked)
120 if (is_blocked ==
true)
121 dummy->spawn(*dummy);
125 tbb::empty_task *dummy;
129 const bool is_blocked;
134 class PartitionWork :
public tbb::task
137 PartitionWork(MFWorkerInterface &function_in,
138 const unsigned int partition_in,
139 const TaskInfo & task_info_in,
140 const bool is_blocked_in =
false)
142 , function(function_in)
144 , task_info(task_info_in)
145 , is_blocked(is_blocked_in)
151 tbb::empty_task *root =
152 new (tbb::task::allocate_root()) tbb::empty_task;
153 const unsigned int evens = task_info.partition_evens[
partition];
154 const unsigned int odds = task_info.partition_odds[
partition];
155 const unsigned int n_blocked_workers =
156 task_info.partition_n_blocked_workers[
partition];
157 const unsigned int n_workers =
158 task_info.partition_n_workers[
partition];
159 std::vector<CellWork *> worker(n_workers);
160 std::vector<CellWork *> blocked_worker(n_blocked_workers);
162 root->set_ref_count(evens + 1);
163 for (
unsigned int j = 0; j < evens; j++)
165 worker[j] =
new (root->allocate_child())
167 task_info.partition_row_index[partition] + 2 * j,
172 worker[j]->set_ref_count(2);
173 blocked_worker[j - 1]->dummy =
174 new (worker[j]->allocate_child()) tbb::empty_task;
175 worker[j - 1]->spawn(*blocked_worker[j - 1]);
178 worker[j]->set_ref_count(1);
181 blocked_worker[j] =
new (worker[j]->allocate_child())
183 task_info.partition_row_index[partition] + 2 * j +
192 worker[evens] =
new (worker[j]->allocate_child())
194 task_info.partition_row_index[partition] +
198 worker[j]->spawn(*worker[evens]);
202 tbb::empty_task *child =
203 new (worker[j]->allocate_child()) tbb::empty_task();
204 worker[j]->spawn(*child);
209 root->wait_for_all();
210 root->destroy(*root);
211 if (is_blocked ==
true)
212 dummy->spawn(*dummy);
216 tbb::empty_task *dummy;
219 MFWorkerInterface &
function;
221 const TaskInfo & task_info;
222 const bool is_blocked;
234 CellWork(MFWorkerInterface &worker_in,
235 const TaskInfo & task_info_in,
236 const unsigned int partition_in)
238 , task_info(task_info_in)
243 operator()(
const tbb::blocked_range<unsigned int> &r)
const 245 const unsigned int start_index =
246 task_info.cell_partition_data[
partition] +
247 task_info.block_size * r.begin();
248 const unsigned int end_index =
249 std::min(start_index + task_info.block_size * (r.end() - r.begin()),
250 task_info.cell_partition_data[partition + 1]);
251 worker.cell(std::make_pair(start_index, end_index));
253 if (task_info.face_partition_data.empty() ==
false)
260 MFWorkerInterface &worker;
261 const TaskInfo & task_info;
267 class PartitionWork :
public tbb::task
270 PartitionWork(MFWorkerInterface &worker_in,
271 const unsigned int partition_in,
272 const TaskInfo & task_info_in,
273 const bool is_blocked_in)
277 , task_info(task_info_in)
278 , is_blocked(is_blocked_in)
284 const unsigned int n_chunks =
285 (task_info.cell_partition_data[
partition + 1] -
286 task_info.cell_partition_data[
partition] + task_info.block_size -
288 task_info.block_size;
289 parallel_for(tbb::blocked_range<unsigned int>(0, n_chunks, 1),
290 CellWork(worker, task_info, partition));
291 if (is_blocked ==
true)
292 dummy->spawn(*dummy);
296 tbb::empty_task *dummy;
299 MFWorkerInterface &worker;
301 const TaskInfo & task_info;
302 const bool is_blocked;
309 class MPICommunication :
public tbb::task
312 MPICommunication(MFWorkerInterface &worker_in,
const bool do_compress)
314 , do_compress(do_compress)
320 if (do_compress ==
false)
321 worker.vector_update_ghosts_finish();
323 worker.vector_compress_start();
328 MFWorkerInterface &worker;
329 const bool do_compress;
332 #endif // DEAL_II_WITH_THREADS 341 #ifdef DEAL_II_WITH_THREADS 346 if (scheme == partition_partition)
348 tbb::empty_task *root =
349 new (tbb::task::allocate_root()) tbb::empty_task;
350 root->set_ref_count(evens + 1);
351 std::vector<partition::PartitionWork *> worker(n_workers);
352 std::vector<partition::PartitionWork *> blocked_worker(
354 MPICommunication *worker_compr =
355 new (root->allocate_child()) MPICommunication(funct,
true);
356 worker_compr->set_ref_count(1);
357 for (
unsigned int j = 0; j < evens; j++)
361 worker[j] =
new (root->allocate_child())
362 partition::PartitionWork(funct, 2 * j, *
this,
false);
363 worker[j]->set_ref_count(2);
364 blocked_worker[j - 1]->dummy =
365 new (worker[j]->allocate_child()) tbb::empty_task;
367 worker[j - 1]->spawn(*blocked_worker[j - 1]);
369 worker_compr->spawn(*blocked_worker[j - 1]);
373 worker[j] =
new (worker_compr->allocate_child())
374 partition::PartitionWork(funct, 2 * j, *
this,
false);
375 worker[j]->set_ref_count(2);
376 MPICommunication *worker_dist =
377 new (worker[j]->allocate_child())
378 MPICommunication(funct,
false);
379 worker_dist->spawn(*worker_dist);
383 blocked_worker[j] =
new (worker[j]->allocate_child())
384 partition::PartitionWork(funct, 2 * j + 1, *
this,
true);
390 worker[evens] =
new (worker[j]->allocate_child())
391 partition::PartitionWork(funct,
395 worker[j]->spawn(*worker[evens]);
399 tbb::empty_task *child =
400 new (worker[j]->allocate_child()) tbb::empty_task();
401 worker[j]->spawn(*child);
406 root->wait_for_all();
407 root->destroy(*root);
415 tbb::empty_task *root =
416 new (tbb::task::allocate_root()) tbb::empty_task;
417 root->set_ref_count(evens + 1);
418 const unsigned int n_blocked_workers =
419 odds - (odds + evens + 1) % 2;
420 const unsigned int n_workers =
421 cell_partition_data.size() - 1 - n_blocked_workers;
422 std::vector<color::PartitionWork *> worker(n_workers);
423 std::vector<color::PartitionWork *> blocked_worker(
425 unsigned int worker_index = 0, slice_index = 0;
426 unsigned int spawn_index = 0;
427 int spawn_index_child = -2;
428 MPICommunication *worker_compr =
429 new (root->allocate_child()) MPICommunication(funct,
true);
430 worker_compr->set_ref_count(1);
431 for (
unsigned int part = 0;
432 part < partition_row_index.size() - 1;
435 const unsigned int spawn_index_new = worker_index;
437 worker[worker_index] =
438 new (worker_compr->allocate_child())
439 color::PartitionWork(funct,
444 worker[worker_index] =
new (root->allocate_child())
445 color::PartitionWork(funct,
450 for (; slice_index < partition_row_index[part + 1];
453 worker[worker_index]->set_ref_count(1);
455 worker[worker_index] =
456 new (worker[worker_index - 1]->allocate_child())
457 color::PartitionWork(funct,
462 worker[worker_index]->set_ref_count(2);
465 blocked_worker[(part - 1) / 2]->dummy =
466 new (worker[worker_index]->allocate_child())
469 if (spawn_index_child == -1)
470 worker[spawn_index]->spawn(
471 *blocked_worker[(part - 1) / 2]);
474 Assert(spawn_index_child >= 0,
476 worker[spawn_index]->spawn(
477 *worker[spawn_index_child]);
479 spawn_index = spawn_index_new;
480 spawn_index_child = -2;
484 MPICommunication *worker_dist =
485 new (worker[worker_index]->allocate_child())
486 MPICommunication(funct,
false);
487 worker_dist->spawn(*worker_dist);
491 if (part < partition_row_index.size() - 1)
493 if (part < partition_row_index.size() - 2)
495 blocked_worker[part / 2] =
496 new (worker[worker_index - 1]->allocate_child())
497 color::PartitionWork(funct,
502 if (slice_index < partition_row_index[part + 1])
504 blocked_worker[part / 2]->set_ref_count(1);
505 worker[worker_index] =
new (
506 blocked_worker[part / 2]->allocate_child())
507 color::PartitionWork(funct,
515 spawn_index_child = -1;
519 for (; slice_index < partition_row_index[part + 1];
522 if (slice_index > partition_row_index[part])
524 worker[worker_index]->set_ref_count(1);
527 worker[worker_index] =
528 new (worker[worker_index - 1]->allocate_child())
529 color::PartitionWork(funct,
534 spawn_index_child = worker_index;
539 tbb::empty_task *
final =
540 new (worker[worker_index - 1]->allocate_child())
542 worker[spawn_index]->spawn(*
final);
543 spawn_index_child = worker_index - 1;
549 worker[spawn_index]->spawn(*worker[spawn_index_child]);
551 root->wait_for_all();
552 root->destroy(*root);
562 for (
unsigned int color = 0; color < partition_row_index[1];
565 tbb::empty_task *root =
566 new (tbb::task::allocate_root()) tbb::empty_task;
567 root->set_ref_count(2);
568 color::PartitionWork *worker =
569 new (root->allocate_child())
570 color::PartitionWork(funct, color, *
this,
false);
571 root->spawn(*worker);
572 root->wait_for_all();
573 root->destroy(*root);
585 for (
unsigned int part = 0; part < partition_row_index.size() - 2;
591 for (
unsigned int i = partition_row_index[part];
592 i < partition_row_index[part + 1];
596 if (cell_partition_data[i + 1] > cell_partition_data[i])
599 funct.
cell(std::make_pair(cell_partition_data[i],
600 cell_partition_data[i + 1]));
603 if (face_partition_data.empty() ==
false)
605 if (face_partition_data[i + 1] > face_partition_data[i])
606 funct.
face(std::make_pair(face_partition_data[i],
607 face_partition_data[i + 1]));
608 if (boundary_partition_data[i + 1] >
609 boundary_partition_data[i])
611 std::make_pair(boundary_partition_data[i],
612 boundary_partition_data[i + 1]));
637 vectorization_length = 1;
641 partition_row_index.clear();
642 cell_partition_data.clear();
643 face_partition_data.clear();
644 boundary_partition_data.clear();
647 n_blocked_workers = 0;
649 partition_evens.clear();
650 partition_odds.clear();
651 partition_n_blocked_workers.clear();
652 partition_n_workers.clear();
653 communicator = MPI_COMM_SELF;
660 template <
typename StreamType>
663 const std::size_t data_length)
const 670 out << memory_c.
min <<
"/" << memory_c.
avg <<
"/" << memory_c.
max;
671 out <<
" MB" << std::endl;
695 const unsigned int n_active_cells_in,
696 const unsigned int n_active_and_ghost_cells,
697 const unsigned int vectorization_length_in,
698 std::vector<unsigned int> &boundary_cells)
700 vectorization_length = vectorization_length_in;
701 n_active_cells = n_active_cells_in;
702 n_ghost_cells = n_active_and_ghost_cells - n_active_cells;
706 unsigned int fillup_needed =
707 (vectorization_length - boundary_cells.size() % vectorization_length) %
708 vectorization_length;
709 if (fillup_needed > 0 && boundary_cells.size() < n_active_cells)
714 std::vector<unsigned int> new_boundary_cells;
715 new_boundary_cells.reserve(boundary_cells.size());
717 unsigned int next_free_slot = 0, bound_index = 0;
718 while (fillup_needed > 0 && bound_index < boundary_cells.size())
720 if (next_free_slot < boundary_cells[bound_index])
724 if (next_free_slot + fillup_needed <=
725 boundary_cells[bound_index])
727 for (
unsigned int j =
728 boundary_cells[bound_index] - fillup_needed;
729 j < boundary_cells[bound_index];
731 new_boundary_cells.push_back(j);
738 for (
unsigned int j = next_free_slot;
739 j < boundary_cells[bound_index];
741 new_boundary_cells.push_back(j);
743 boundary_cells[bound_index] - next_free_slot;
746 new_boundary_cells.push_back(boundary_cells[bound_index]);
747 next_free_slot = boundary_cells[bound_index] + 1;
750 while (fillup_needed > 0 &&
751 (new_boundary_cells.size() == 0 ||
752 new_boundary_cells.back() < n_active_cells - 1))
753 new_boundary_cells.push_back(new_boundary_cells.back() + 1);
754 while (bound_index < boundary_cells.size())
755 new_boundary_cells.push_back(boundary_cells[bound_index++]);
757 boundary_cells.swap(new_boundary_cells);
761 std::sort(boundary_cells.begin(), boundary_cells.end());
765 Assert(boundary_cells.size() % vectorization_length == 0 ||
766 boundary_cells.size() == n_active_cells,
774 const std::vector<unsigned int> &boundary_cells,
775 const std::vector<unsigned int> &cells_close_to_boundary,
776 const unsigned int dofs_per_cell,
777 const std::vector<unsigned int> &cell_vectorization_categories,
778 const bool cell_vectorization_categories_strict,
779 std::vector<unsigned int> & renumbering,
780 std::vector<unsigned char> & incompletely_filled_vectorization)
782 const unsigned int n_macro_cells =
783 (n_active_cells + vectorization_length - 1) / vectorization_length;
784 const unsigned int n_ghost_slots =
785 (n_ghost_cells + vectorization_length - 1) / vectorization_length;
786 const unsigned int n_boundary_cells = boundary_cells.size();
788 incompletely_filled_vectorization.resize(n_macro_cells + n_ghost_slots);
789 renumbering.resize(n_active_cells + n_ghost_cells,
795 partition_row_index.resize(3);
797 partition_row_index.resize(5);
800 std::vector<unsigned char> cell_marked(n_active_cells + n_ghost_cells, 0);
803 for (
unsigned int i = 0; i < n_boundary_cells; ++i)
804 cell_marked[boundary_cells[i]] = 2;
806 Assert(boundary_cells.size() % vectorization_length == 0 ||
807 boundary_cells.size() == n_active_cells,
810 const unsigned int n_second_slot =
811 ((n_active_cells - n_boundary_cells) / 2 / vectorization_length) *
812 vectorization_length;
813 unsigned int count = 0;
814 for (
unsigned int i = 0; i < cells_close_to_boundary.size(); ++i)
815 if (cell_marked[cells_close_to_boundary[i]] == 0)
817 cell_marked[cells_close_to_boundary[i]] =
818 count < n_second_slot ? 1 : 3;
823 for (; c < n_active_cells && count < n_second_slot; ++c)
824 if (cell_marked[c] == 0)
829 for (; c < n_active_cells; ++c)
830 if (cell_marked[c] == 0)
832 for (; c < n_active_cells + n_ghost_cells; ++c)
833 if (cell_marked[c] == 0)
837 std::fill(cell_marked.begin(), cell_marked.end(), 1);
839 for (
unsigned int i = 0; i < cell_marked.size(); ++i)
842 unsigned int n_categories = 1;
843 std::vector<unsigned int> tight_category_map;
844 if (cell_vectorization_categories.empty() ==
false)
847 n_active_cells + n_ghost_cells);
852 tight_category_map.resize(n_active_cells + n_ghost_cells);
853 std::set<unsigned int> used_categories;
854 for (
unsigned int i = 0; i < n_active_cells + n_ghost_cells; ++i)
855 used_categories.insert(cell_vectorization_categories[i]);
856 std::vector<unsigned int> used_categories_vector(
857 used_categories.size());
859 for (
auto &it : used_categories)
860 used_categories_vector[n_categories++] = it;
861 for (
unsigned int i = 0; i < n_active_cells + n_ghost_cells; ++i)
863 const unsigned int index =
864 std::lower_bound(used_categories_vector.begin(),
865 used_categories_vector.end(),
866 cell_vectorization_categories[i]) -
867 used_categories_vector.begin();
869 tight_category_map[i] = index;
873 incompletely_filled_vectorization.resize(
874 incompletely_filled_vectorization.size() + 4 * n_categories);
876 else if (cells_close_to_boundary.empty())
877 tight_category_map.resize(n_active_cells + n_ghost_cells, 0);
881 tight_category_map.resize(n_active_cells + n_ghost_cells, 1);
882 for (
unsigned int i = 0; i < cells_close_to_boundary.size(); ++i)
883 tight_category_map[cells_close_to_boundary[i]] = 0;
886 cell_partition_data.clear();
887 cell_partition_data.resize(1, 0);
888 unsigned int counter = 0;
889 unsigned int n_cells = 0;
890 std::vector<std::vector<unsigned int>> renumbering_category(n_categories);
891 for (
unsigned int block = 1; block < (n_procs > 1u ? 5u : 3u); ++block)
894 for (
unsigned int i = 0; i < n_active_cells + n_ghost_cells; ++i)
895 if (cell_marked[i] == block)
896 renumbering_category[tight_category_map[i]].push_back(i);
900 if (cell_vectorization_categories_strict ==
false && n_categories > 1)
901 for (
unsigned int j = n_categories - 1; j > 0; --j)
903 unsigned int lower_index = j - 1;
904 while (renumbering_category[j].size() % vectorization_length)
906 while (renumbering_category[j].size() %
907 vectorization_length &&
908 !renumbering_category[lower_index].empty())
910 renumbering_category[j].push_back(
911 renumbering_category[lower_index].back());
912 renumbering_category[lower_index].pop_back();
914 if (lower_index == 0)
922 for (
unsigned int j = 0; j < n_categories; ++j)
924 for (
unsigned int jj = 0; jj < renumbering_category[j].size();
926 renumbering[counter++] = renumbering_category[j][jj];
927 unsigned int remainder =
928 renumbering_category[j].size() % vectorization_length;
930 incompletely_filled_vectorization
931 [renumbering_category[j].size() / vectorization_length +
932 n_cells] = remainder;
933 const unsigned int n_my_macro_cells =
934 (renumbering_category[j].size() + vectorization_length - 1) /
935 vectorization_length;
936 renumbering_category[j].clear();
940 const unsigned int block_size =
941 std::max((2048U / dofs_per_cell) / 8 * 4, 2U);
943 for (
unsigned int k = 0; k < n_my_macro_cells; k += block_size)
944 cell_partition_data.push_back(
945 n_cells + std::min(k + block_size, n_my_macro_cells));
947 cell_partition_data.back() += n_my_macro_cells;
948 n_cells += n_my_macro_cells;
950 partition_row_index[block] = cell_partition_data.size() - 1;
951 if (block == 3 || (block == 1 && n_procs == 1))
952 cell_partition_data.push_back(n_cells);
954 if (cell_vectorization_categories_strict ==
true)
965 incompletely_filled_vectorization.resize(cell_partition_data.back());
972 const std::vector<unsigned int> &boundary_cells,
973 std::vector<unsigned int> & renumbering,
974 std::vector<unsigned char> & incompletely_filled_vectorization)
976 const unsigned int n_macro_cells =
977 (n_active_cells + vectorization_length - 1) / vectorization_length;
978 const unsigned int n_ghost_slots =
979 (n_ghost_cells + vectorization_length - 1) / vectorization_length;
980 incompletely_filled_vectorization.resize(n_macro_cells + n_ghost_slots);
981 if (n_macro_cells * vectorization_length > n_active_cells)
982 incompletely_filled_vectorization[n_macro_cells - 1] =
983 vectorization_length -
984 (n_macro_cells * vectorization_length - n_active_cells);
985 if (n_ghost_slots * vectorization_length > n_ghost_cells)
986 incompletely_filled_vectorization[n_macro_cells + n_ghost_slots - 1] =
987 vectorization_length -
988 (n_ghost_slots * vectorization_length - n_ghost_cells);
990 std::vector<unsigned int> reverse_numbering(
992 for (
unsigned int j = 0; j < boundary_cells.size(); ++j)
993 reverse_numbering[boundary_cells[j]] = j;
994 unsigned int counter = boundary_cells.size();
995 for (
unsigned int j = 0; j < n_active_cells; ++j)
997 reverse_numbering[j] = counter++;
1002 for (
unsigned int j = n_active_cells; j < n_active_cells + n_ghost_cells;
1004 renumbering.push_back(j);
1008 cell_partition_data.clear();
1009 cell_partition_data.push_back(0);
1012 const unsigned int n_macro_boundary_cells =
1013 (boundary_cells.size() + vectorization_length - 1) /
1014 vectorization_length;
1015 cell_partition_data.push_back(
1016 (n_macro_cells - n_macro_boundary_cells) / 2);
1017 cell_partition_data.push_back(cell_partition_data[1] +
1018 n_macro_boundary_cells);
1022 cell_partition_data.push_back(n_macro_cells);
1023 cell_partition_data.push_back(cell_partition_data.back() + n_ghost_slots);
1024 partition_row_index.resize(n_procs > 1 ? 4 : 2);
1025 partition_row_index[0] = 0;
1026 partition_row_index[1] = 1;
1029 partition_row_index[2] = 2;
1030 partition_row_index[3] = 3;
1040 if (block_size == 0)
1045 vectorization_length);
1049 const unsigned int minimum_parallel_grain_size = 200;
1050 if (dofs_per_cell * block_size < minimum_parallel_grain_size)
1051 block_size = (minimum_parallel_grain_size / dofs_per_cell + 1);
1052 if (dofs_per_cell * block_size > 10000)
1055 block_size = 1 << (
unsigned int)(log2(block_size + 1));
1057 if (block_size > n_active_cells)
1058 block_size = std::max(1U, n_active_cells);
1066 std::vector<unsigned int> & renumbering,
1067 std::vector<unsigned char> &irregular_cells,
1070 const unsigned int n_macro_cells = *(cell_partition_data.end() - 2);
1071 if (n_macro_cells == 0)
1076 unsigned int partition = 0, counter = 0;
1081 make_connectivity_cells_to_blocks(irregular_cells,
1090 std::vector<unsigned int> cell_partition(n_blocks,
1095 std::vector<unsigned int> partition_list(n_blocks, 0);
1096 std::vector<unsigned int> partition_color_list(n_blocks, 0);
1099 std::vector<unsigned int> partition_size(2, 0);
1105 unsigned int cluster_size = 1;
1108 make_partitioning(connectivity,
1116 make_coloring_within_partitions_pre_blocked(connectivity,
1121 partition_color_list);
1123 partition_list = renumbering;
1128 std::vector<unsigned int> sorted_pc_list(partition_color_list);
1129 std::sort(sorted_pc_list.begin(), sorted_pc_list.end());
1130 for (
unsigned int i = 0; i < sorted_pc_list.size(); ++i)
1137 std::vector<unsigned int> block_start(n_macro_cells + 1);
1138 std::vector<unsigned char> irregular(n_macro_cells);
1140 unsigned int mcell_start = 0;
1142 for (
unsigned int block = 0; block < n_blocks; block++)
1144 block_start[block + 1] = block_start[block];
1145 for (
unsigned int mcell = mcell_start;
1146 mcell < std::min(mcell_start + block_size, n_macro_cells);
1149 unsigned int n_comp = (irregular_cells[mcell] > 0) ?
1150 irregular_cells[mcell] :
1151 vectorization_length;
1152 block_start[block + 1] += n_comp;
1155 mcell_start += block_size;
1158 unsigned int counter_macro = 0;
1159 unsigned int block_size_last =
1160 n_macro_cells - block_size * (n_blocks - 1);
1161 if (block_size_last == 0)
1162 block_size_last = block_size;
1164 unsigned int tick = 0;
1165 for (
unsigned int block = 0; block < n_blocks; block++)
1167 unsigned int present_block = partition_color_list[block];
1168 for (
unsigned int cell = block_start[present_block];
1169 cell < block_start[present_block + 1];
1171 renumbering[counter++] = partition_list[cell];
1172 unsigned int this_block_size =
1173 (present_block == n_blocks - 1) ? block_size_last : block_size;
1177 if (cell_partition_data[tick] == block)
1178 cell_partition_data[tick++] = counter_macro;
1180 for (
unsigned int j = 0; j < this_block_size; j++)
1181 irregular[counter_macro++] =
1182 irregular_cells[present_block * block_size + j];
1185 cell_partition_data.back() = counter_macro;
1187 irregular_cells.swap(irregular);
1194 std::vector<unsigned int> sorted_renumbering(renumbering);
1195 std::sort(sorted_renumbering.begin(), sorted_renumbering.end());
1196 for (
unsigned int i = 0; i < sorted_renumbering.size(); ++i)
1212 const std::vector<unsigned int> &cell_active_fe_index,
1214 std::vector<unsigned int> & renumbering,
1215 std::vector<unsigned char> & irregular_cells,
1218 const unsigned int n_macro_cells = *(cell_partition_data.end() - 2);
1219 if (n_macro_cells == 0)
1227 make_connectivity_cells_to_blocks(irregular_cells,
1229 connectivity_blocks);
1231 unsigned int n_blocks = 0;
1232 if (scheme == partition_color ||
1234 n_blocks = this->n_blocks;
1236 n_blocks = n_active_cells;
1241 std::vector<unsigned int> cell_partition(n_blocks,
1247 std::vector<unsigned int> partition_list(n_blocks, 0);
1248 std::vector<unsigned int> partition_2layers_list(n_blocks, 0);
1251 std::vector<unsigned int> partition_size(2, 0);
1253 unsigned int partition = 0;
1259 unsigned int cluster_size = 1;
1260 if (scheme == partition_partition)
1261 cluster_size = block_size * vectorization_length;
1264 if (scheme == partition_color || scheme == color)
1265 make_partitioning(connectivity_blocks,
1272 make_partitioning(connectivity,
1280 if (scheme == partition_partition)
1284 make_partitioning_within_partitions_post_blocked(
1286 cell_active_fe_index,
1293 partition_2layers_list,
1296 else if (scheme == partition_color || scheme == color)
1298 make_coloring_within_partitions_pre_blocked(connectivity_blocks,
1303 partition_2layers_list);
1309 std::vector<unsigned int> sorted_pc_list(partition_2layers_list);
1310 std::sort(sorted_pc_list.begin(), sorted_pc_list.end());
1311 for (
unsigned int i = 0; i < sorted_pc_list.size(); ++i)
1317 std::vector<unsigned int> renumbering_in(n_active_cells, 0);
1318 renumbering_in.swap(renumbering);
1319 if (scheme == partition_partition)
1324 for (
unsigned int j = 0; j < renumbering.size(); j++)
1325 renumbering[j] = renumbering_in[partition_2layers_list[j]];
1327 for (
unsigned int i = 0; i < n_ghost_cells; ++i)
1328 renumbering.push_back(i + n_active_cells);
1334 std::vector<unsigned int> block_start(n_macro_cells + 1);
1335 std::vector<unsigned char> irregular(n_macro_cells);
1337 unsigned int counter = 0;
1338 unsigned int mcell_start = 0;
1340 for (
unsigned int block = 0; block < n_blocks; block++)
1342 block_start[block + 1] = block_start[block];
1343 for (
unsigned int mcell = mcell_start;
1344 mcell < std::min(mcell_start + block_size, n_macro_cells);
1347 unsigned int n_comp = (irregular_cells[mcell] > 0) ?
1348 irregular_cells[mcell] :
1349 vectorization_length;
1350 block_start[block + 1] += n_comp;
1353 mcell_start += block_size;
1356 unsigned int counter_macro = 0;
1357 unsigned int block_size_last =
1358 n_macro_cells - block_size * (n_blocks - 1);
1359 if (block_size_last == 0)
1360 block_size_last = block_size;
1362 unsigned int tick = 0;
1363 for (
unsigned int block = 0; block < n_blocks; block++)
1365 unsigned int present_block = partition_2layers_list[block];
1366 for (
unsigned int cell = block_start[present_block];
1367 cell < block_start[present_block + 1];
1369 renumbering[counter++] = renumbering_in[cell];
1370 unsigned int this_block_size =
1371 (present_block == n_blocks - 1) ? block_size_last : block_size;
1375 if (cell_partition_data[tick] == block)
1376 cell_partition_data[tick++] = counter_macro;
1378 for (
unsigned int j = 0; j < this_block_size; j++)
1379 irregular[counter_macro++] =
1380 irregular_cells[present_block * block_size + j];
1383 cell_partition_data.back() = counter_macro;
1385 irregular_cells.swap(irregular);
1391 std::vector<unsigned int> sorted_renumbering(renumbering);
1392 std::sort(sorted_renumbering.begin(), sorted_renumbering.end());
1393 for (
unsigned int i = 0; i < sorted_renumbering.size(); ++i)
1400 update_task_info(partition);
1407 const std::vector<unsigned int> &cell_active_fe_index,
1409 std::vector<unsigned int> & renumbering,
1410 std::vector<unsigned char> & irregular_cells,
1413 const unsigned int n_macro_cells = *(cell_partition_data.end() - 2);
1414 if (n_macro_cells == 0)
1417 const unsigned int cluster_size = block_size * vectorization_length;
1424 std::vector<unsigned int> cell_partition(n_active_cells,
1430 std::vector<unsigned int> partition_list(n_active_cells, 0);
1431 std::vector<unsigned int> partition_partition_list(n_active_cells, 0);
1434 std::vector<unsigned int> partition_size(2, 0);
1436 unsigned int partition = 0;
1441 make_partitioning(connectivity,
1449 make_partitioning_within_partitions_post_blocked(connectivity,
1450 cell_active_fe_index,
1457 partition_partition_list,
1460 partition_list.swap(renumbering);
1462 for (
unsigned int j = 0; j < renumbering.size(); j++)
1463 renumbering[j] = partition_list[partition_partition_list[j]];
1465 for (
unsigned int i = 0; i < n_ghost_cells; ++i)
1466 renumbering.push_back(i + n_active_cells);
1468 update_task_info(partition);
1475 const std::vector<unsigned char> &irregular_cells,
1479 std::vector<std::vector<unsigned int>> cell_blocks(n_blocks);
1480 std::vector<unsigned int> touched_cells(n_active_cells);
1481 unsigned int cell = 0;
1482 for (
unsigned int i = 0, mcell = 0; i < n_blocks; ++i)
1484 for (
unsigned int c = 0;
1485 c < block_size && mcell < *(cell_partition_data.end() - 2);
1488 unsigned int ncomp = (irregular_cells[mcell] > 0) ?
1489 irregular_cells[mcell] :
1490 vectorization_length;
1491 for (
unsigned int c = 0; c < ncomp; ++c, ++cell)
1493 cell_blocks[i].push_back(cell);
1494 touched_cells[cell] = i;
1499 for (
unsigned int i = 0; i < cell_blocks.size(); ++i)
1500 for (
unsigned int col = 0; col < cell_blocks[i].size(); ++col)
1503 connectivity_cells.
begin(cell_blocks[i][col]);
1504 it != connectivity_cells.
end(cell_blocks[i][col]);
1507 if (touched_cells[it->column()] != i)
1508 connectivity_blocks.
add(i, touched_cells[it->column()]);
1520 const std::vector<unsigned int> &cell_active_fe_index,
1521 const unsigned int partition,
1522 const unsigned int cluster_size,
1524 const std::vector<unsigned int> &cell_partition,
1525 const std::vector<unsigned int> &partition_list,
1526 const std::vector<unsigned int> &partition_size,
1527 std::vector<unsigned int> & partition_partition_list,
1528 std::vector<unsigned char> & irregular_cells)
1530 const unsigned int n_macro_cells = *(cell_partition_data.end() - 2);
1531 const unsigned int n_ghost_slots =
1532 *(cell_partition_data.end() - 1) - n_macro_cells;
1535 std::vector<unsigned int> neighbor_list;
1538 std::vector<unsigned int> neighbor_neighbor_list;
1540 std::vector<unsigned int> renumbering(n_active_cells);
1542 irregular_cells.back() = 0;
1543 irregular_cells.resize(n_active_cells + n_ghost_slots);
1545 unsigned int max_fe_index = 0;
1546 for (
unsigned int i = 0; i < cell_active_fe_index.size(); ++i)
1547 max_fe_index = std::max(cell_active_fe_index[i], max_fe_index);
1548 Assert(!hp_bool || cell_active_fe_index.size() == n_active_cells,
1552 unsigned int n_macro_cells_before = 0;
1558 std::vector<unsigned int> cell_partition_l2(
1560 partition_row_index.clear();
1561 partition_row_index.resize(partition + 1, 0);
1562 cell_partition_data.resize(1, 0);
1564 unsigned int counter = 0;
1565 unsigned int missing_macros;
1566 for (
unsigned int part = 0; part < partition; ++part)
1568 neighbor_neighbor_list.resize(0);
1569 neighbor_list.resize(0);
1571 unsigned int partition_l2 = 0;
1572 unsigned int start_up = partition_size[part];
1573 unsigned int partition_counter = 0;
1576 if (neighbor_list.size() == 0)
1579 partition_counter = 0;
1580 for (
unsigned int j = start_up;
1581 j < partition_size[part + 1];
1583 if (cell_partition[partition_list[j]] == part &&
1584 cell_partition_l2[partition_list[j]] ==
1589 partition_counter = 1;
1593 cell_partition_l2[partition_list[start_up]] =
1595 neighbor_neighbor_list.push_back(
1596 partition_list[start_up]);
1597 partition_partition_list[counter++] =
1598 partition_list[start_up];
1605 partition_counter = 0;
1606 for (
unsigned int j = 0; j < neighbor_list.size(); ++j)
1608 Assert(cell_partition[neighbor_list[j]] == part,
1610 Assert(cell_partition_l2[neighbor_list[j]] ==
1616 end = connectivity.
end(
1618 for (; neighbor != end; ++neighbor)
1620 if (cell_partition[neighbor->
column()] == part &&
1621 cell_partition_l2[neighbor->
column()] ==
1624 cell_partition_l2[neighbor->
column()] =
1626 neighbor_neighbor_list.push_back(
1628 partition_partition_list[counter++] =
1630 partition_counter++;
1635 if (partition_counter > 0)
1637 int index_before = neighbor_neighbor_list.size(),
1638 index = index_before;
1643 std::vector<unsigned int> remaining_per_macro_cell(
1645 std::vector<std::vector<unsigned int>>
1646 renumbering_fe_index;
1649 if (hp_bool ==
true)
1651 renumbering_fe_index.resize(max_fe_index + 1);
1652 for (cell = counter - partition_counter;
1656 renumbering_fe_index
1657 [cell_active_fe_index.empty() ?
1659 cell_active_fe_index
1660 [partition_partition_list[cell]]]
1661 .push_back(partition_partition_list[cell]);
1664 for (
unsigned int j = 0; j < max_fe_index + 1; j++)
1666 remaining_per_macro_cell[j] =
1667 renumbering_fe_index[j].size() %
1668 vectorization_length;
1669 if (remaining_per_macro_cell[j] != 0)
1672 ((renumbering_fe_index[j].size() +
1673 vectorization_length - 1) /
1674 vectorization_length);
1679 remaining_per_macro_cell.resize(1);
1680 remaining_per_macro_cell[0] =
1681 partition_counter % vectorization_length;
1683 partition_counter / vectorization_length;
1684 if (remaining_per_macro_cell[0] != 0)
1691 cluster_size - (missing_macros % cluster_size);
1694 while (missing_macros > 0 || filled ==
false)
1698 index = neighbor_neighbor_list.size();
1699 if (index == index_before)
1701 if (missing_macros != 0)
1703 neighbor_neighbor_list.resize(0);
1708 index_before = index;
1711 unsigned int additional =
1712 neighbor_neighbor_list[index];
1723 for (; neighbor != end; ++neighbor)
1725 if (cell_partition[neighbor->
column()] == part &&
1726 cell_partition_l2[neighbor->
column()] ==
1729 unsigned int this_index = 0;
1730 if (hp_bool ==
true)
1732 cell_active_fe_index.empty() ?
1734 cell_active_fe_index[neighbor
1741 if (missing_macros > 0 ||
1742 remaining_per_macro_cell[this_index] > 0)
1744 cell_partition_l2[neighbor->
column()] =
1746 neighbor_neighbor_list.push_back(
1748 if (hp_bool ==
true)
1749 renumbering_fe_index[this_index]
1750 .push_back(neighbor->
column());
1751 partition_partition_list[counter] =
1754 partition_counter++;
1755 if (remaining_per_macro_cell
1756 [this_index] == 0 &&
1759 remaining_per_macro_cell[this_index]++;
1760 if (remaining_per_macro_cell
1762 vectorization_length)
1764 remaining_per_macro_cell[this_index] =
1767 if (missing_macros == 0)
1770 for (
unsigned int fe_ind = 0;
1771 fe_ind < max_fe_index + 1;
1773 if (remaining_per_macro_cell
1783 if (hp_bool ==
true)
1788 cell = counter - partition_counter;
1789 for (
unsigned int j = 0; j < max_fe_index + 1; j++)
1791 for (
unsigned int jj = 0;
1792 jj < renumbering_fe_index[j].size();
1794 renumbering[cell++] =
1795 renumbering_fe_index[j][jj];
1796 if (renumbering_fe_index[j].size() %
1797 vectorization_length !=
1799 irregular_cells[renumbering_fe_index[j].size() /
1800 vectorization_length +
1801 n_macro_cells_before] =
1802 renumbering_fe_index[j].size() %
1803 vectorization_length;
1804 n_macro_cells_before +=
1805 (renumbering_fe_index[j].size() +
1806 vectorization_length - 1) /
1807 vectorization_length;
1808 renumbering_fe_index[j].resize(0);
1813 n_macro_cells_before +=
1814 partition_counter / vectorization_length;
1815 if (partition_counter % vectorization_length != 0)
1817 irregular_cells[n_macro_cells_before] =
1818 partition_counter % vectorization_length;
1819 n_macro_cells_before++;
1823 cell_partition_data.push_back(n_macro_cells_before);
1826 neighbor_list = neighbor_neighbor_list;
1827 neighbor_neighbor_list.resize(0);
1829 partition_row_index[part + 1] =
1830 partition_row_index[part] + partition_l2;
1833 if (hp_bool ==
true)
1835 partition_partition_list.swap(renumbering);
1846 const unsigned int partition,
1847 const std::vector<unsigned int> &cell_partition,
1848 const std::vector<unsigned int> &partition_list,
1849 const std::vector<unsigned int> &partition_size,
1850 std::vector<unsigned int> & partition_color_list)
1852 const unsigned int n_macro_cells = *(cell_partition_data.end() - 2);
1853 std::vector<unsigned int> cell_color(n_blocks, n_macro_cells);
1854 std::vector<bool> color_finder;
1856 partition_row_index.resize(partition + 1);
1857 cell_partition_data.clear();
1858 unsigned int color_counter = 0, index_counter = 0;
1859 for (
unsigned int part = 0; part < partition; part++)
1861 partition_row_index[part] = index_counter;
1862 unsigned int max_color = 0;
1863 for (
unsigned int k = partition_size[part];
1864 k < partition_size[part + 1];
1867 unsigned int cell = partition_list[k];
1868 unsigned int n_neighbors = connectivity.
row_length(cell);
1872 color_finder.resize(n_neighbors + 1);
1873 for (
unsigned int j = 0; j <= n_neighbors; ++j)
1874 color_finder[j] =
true;
1876 connectivity.
begin(cell),
1877 end = connectivity.
end(cell);
1878 for (; neighbor != end; ++neighbor)
1882 if (cell_partition[neighbor->
column()] == part &&
1883 cell_color[neighbor->
column()] <= n_neighbors)
1884 color_finder[cell_color[neighbor->
column()]] =
false;
1887 cell_color[cell] = 0;
1888 while (color_finder[cell_color[cell]] ==
false)
1890 if (cell_color[cell] > max_color)
1891 max_color = cell_color[cell];
1896 for (
unsigned int color = 0; color <= max_color; color++)
1898 cell_partition_data.push_back(color_counter);
1900 for (
unsigned int k = partition_size[part];
1901 k < partition_size[part + 1];
1904 unsigned int cell = partition_list[k];
1905 if (cell_color[cell] == color)
1907 partition_color_list[color_counter++] = cell;
1912 cell_partition_data.push_back(n_blocks);
1913 partition_row_index[partition] = index_counter;
1921 const unsigned int cluster_size,
1922 std::vector<unsigned int> & cell_partition,
1923 std::vector<unsigned int> & partition_list,
1924 std::vector<unsigned int> & partition_size,
1925 unsigned int & partition)
const 1934 std::vector<unsigned int> neighbor_list;
1937 std::vector<unsigned int> neighbor_neighbor_list;
1947 unsigned int counter = 0;
1948 unsigned int start_nonboundary =
1949 cell_partition_data.size() == 5 ?
1950 vectorization_length *
1951 (cell_partition_data[2] - cell_partition_data[1]) :
1954 const unsigned int n_macro_cells = *(cell_partition_data.end() - 2);
1955 if (n_macro_cells == 0)
1957 if (scheme == color)
1958 start_nonboundary = n_macro_cells;
1959 if (scheme == partition_color ||
1961 start_nonboundary = ((start_nonboundary + block_size - 1) / block_size);
1962 unsigned int n_blocks;
1963 if (scheme == partition_color ||
1965 n_blocks = this->n_blocks;
1967 n_blocks = n_active_cells;
1969 if (start_nonboundary > n_blocks)
1970 start_nonboundary = n_blocks;
1973 unsigned int start_up = 0;
1975 unsigned int remainder = cluster_size;
1983 if (start_nonboundary > 0)
1985 for (
unsigned int cell = 0; cell < start_nonboundary; ++cell)
1987 const unsigned int cell_nn = cell;
1988 cell_partition[cell_nn] = partition;
1989 neighbor_list.push_back(cell_nn);
1990 partition_list[counter++] = cell_nn;
1991 partition_size.back()++;
1993 start_nonboundary = 0;
1994 remainder -= (start_nonboundary % cluster_size);
1995 if (remainder == cluster_size)
2002 cell_partition[start_up] = partition;
2003 neighbor_list.push_back(start_up);
2004 partition_list[counter++] = start_up;
2005 partition_size.back()++;
2008 if (remainder == cluster_size)
2011 int index_before = neighbor_list.size(), index = index_before,
2013 while (remainder > 0)
2015 if (index == index_stop)
2017 index = neighbor_list.size();
2018 if (index == index_before)
2020 neighbor_list.resize(0);
2023 index_stop = index_before;
2024 index_before = index;
2027 unsigned int additional = neighbor_list[index];
2029 connectivity.
begin(additional),
2031 connectivity.
end(additional);
2032 for (; neighbor != end; ++neighbor)
2034 if (cell_partition[neighbor->
column()] ==
2037 partition_size.back()++;
2038 cell_partition[neighbor->
column()] = partition;
2039 neighbor_list.push_back(neighbor->
column());
2040 partition_list[counter++] = neighbor->
column();
2048 while (neighbor_list.size() > 0)
2053 unsigned int partition_counter = 0;
2056 partition_size.push_back(partition_size.back());
2060 for (
unsigned int j = 0; j < neighbor_list.size(); ++j)
2062 Assert(cell_partition[neighbor_list[j]] == partition - 1,
2067 end = connectivity.
end(
2069 for (; neighbor != end; ++neighbor)
2071 if (cell_partition[neighbor->
column()] ==
2074 partition_size.back()++;
2075 cell_partition[neighbor->
column()] = partition;
2079 neighbor_neighbor_list.push_back(neighbor->
column());
2080 partition_list[counter++] = neighbor->
column();
2081 partition_counter++;
2085 remainder = cluster_size - (partition_counter % cluster_size);
2086 if (remainder == cluster_size)
2089 int index_before = neighbor_neighbor_list.size(),
2090 index = index_before;
2091 while (remainder > 0)
2093 if (index == index_stop)
2095 index = neighbor_neighbor_list.size();
2096 if (index == index_before)
2098 neighbor_neighbor_list.resize(0);
2101 index_stop = index_before;
2102 index_before = index;
2105 unsigned int additional = neighbor_neighbor_list[index];
2109 end = connectivity.
end(
2111 for (; neighbor != end; ++neighbor)
2113 if (cell_partition[neighbor->
column()] ==
2116 partition_size.back()++;
2117 cell_partition[neighbor->
column()] = partition;
2118 neighbor_neighbor_list.push_back(neighbor->
column());
2119 partition_list[counter++] = neighbor->
column();
2127 neighbor_list = neighbor_neighbor_list;
2128 neighbor_neighbor_list.resize(0);
2134 for (
unsigned int j = start_up; j < n_blocks; ++j)
2140 remainder = cluster_size;
2154 evens = (partition + 1) / 2;
2155 odds = partition / 2;
2156 n_blocked_workers = odds - (odds + evens + 1) % 2;
2157 n_workers = evens + odds - n_blocked_workers;
2159 partition_evens.resize(partition);
2160 partition_odds.resize(partition);
2161 partition_n_blocked_workers.resize(partition);
2162 partition_n_workers.resize(partition);
2163 for (
unsigned int part = 0; part < partition; part++)
2165 partition_evens[part] =
2166 (partition_row_index[part + 1] - partition_row_index[part] + 1) / 2;
2167 partition_odds[part] =
2168 (partition_row_index[part + 1] - partition_row_index[part]) / 2;
2169 partition_n_blocked_workers[part] =
2170 partition_odds[part] -
2171 (partition_odds[part] + partition_evens[part] + 1) % 2;
2172 partition_n_workers[part] = partition_evens[part] +
2173 partition_odds[part] -
2174 partition_n_blocked_workers[part];
2184 internal::MatrixFreeFunctions::TaskInfo::print_memory_statistics<std::ostream>(
2186 const std::size_t)
const;
2192 DEAL_II_NAMESPACE_CLOSE
size_type row_length(const size_type row) const
static const unsigned int invalid_unsigned_int
#define AssertDimension(dim1, dim2)
void print_memory_statistics(StreamType &out, std::size_t data_length) const
virtual void cell(const std::pair< unsigned int, unsigned int > &cell_range)=0
void add(const size_type i, const size_type j)
void guess_block_size(const unsigned int dofs_per_cell)
#define AssertIndexRange(index, range)
std::size_t memory_consumption() const
virtual void vector_update_ghosts_finish()=0
Finishes the communication for the update ghost values operation.
#define AssertThrow(cond, exc)
void make_partitioning_within_partitions_post_blocked(const DynamicSparsityPattern &connectivity, const std::vector< unsigned int > &cell_active_fe_index, const unsigned int partition, const unsigned int cluster_size, const bool hp_bool, const std::vector< unsigned int > &cell_partition, const std::vector< unsigned int > &partition_list, const std::vector< unsigned int > &partition_size, std::vector< unsigned int > &partition_partition_list, std::vector< unsigned char > &irregular_cells)
void make_coloring_within_partitions_pre_blocked(const DynamicSparsityPattern &connectivity, const unsigned int partition, const std::vector< unsigned int > &cell_partition, const std::vector< unsigned int > &partition_list, const std::vector< unsigned int > &partition_size, std::vector< unsigned int > &partition_color_list)
void loop(MFWorkerInterface &worker) const
#define Assert(cond, exc)
void make_thread_graph_partition_color(DynamicSparsityPattern &connectivity, std::vector< unsigned int > &renumbering, std::vector< unsigned char > &irregular_cells, const bool hp_bool)
void update_task_info(const unsigned int partition)
void initial_setup_blocks_tasks(const std::vector< unsigned int > &boundary_cells, std::vector< unsigned int > &renumbering, std::vector< unsigned char > &incompletely_filled_vectorization)
virtual void zero_dst_vector_range(const unsigned int range_index)=0
virtual void boundary(const std::pair< unsigned int, unsigned int > &face_range)=0
void create_blocks_serial(const std::vector< unsigned int > &boundary_cells, const std::vector< unsigned int > &cells_close_to_boundary, const unsigned int dofs_per_cell, const std::vector< unsigned int > &cell_vectorization_categories, const bool cell_vectorization_categories_strict, std::vector< unsigned int > &renumbering, std::vector< unsigned char > &incompletely_filled_vectorization)
std::vector< unsigned int > invert_permutation(const std::vector< unsigned int > &permutation)
void make_connectivity_cells_to_blocks(const std::vector< unsigned char > &irregular_cells, const DynamicSparsityPattern &connectivity_cells, DynamicSparsityPattern &connectivity_blocks) const
virtual void vector_update_ghosts_start()=0
Starts the communication for the update ghost values operation.
virtual void face(const std::pair< unsigned int, unsigned int > &face_range)=0
void collect_boundary_cells(const unsigned int n_active_cells, const unsigned int n_active_and_ghost_cells, const unsigned int vectorization_length, std::vector< unsigned int > &boundary_cells)
void make_thread_graph_partition_partition(const std::vector< unsigned int > &cell_active_fe_index, DynamicSparsityPattern &connectivity, std::vector< unsigned int > &renumbering, std::vector< unsigned char > &irregular_cells, const bool hp_bool)
virtual void vector_compress_start()=0
Starts the communication for the vector compress operation.
void make_partitioning(const DynamicSparsityPattern &connectivity, const unsigned int cluster_size, std::vector< unsigned int > &cell_partition, std::vector< unsigned int > &partition_list, std::vector< unsigned int > &partition_size, unsigned int &partition) const
static::ExceptionBase & ExcNotImplemented()
static unsigned int n_threads()
MinMaxAvg min_max_avg(const double my_value, const MPI_Comm &mpi_communicator)
std::enable_if< std::is_fundamental< T >::value, std::size_t >::type memory_consumption(const T &t)
static::ExceptionBase & ExcInternalError()
void make_thread_graph(const std::vector< unsigned int > &cell_active_fe_index, DynamicSparsityPattern &connectivity, std::vector< unsigned int > &renumbering, std::vector< unsigned char > &irregular_cells, const bool hp_bool)
virtual void vector_compress_finish()=0
Finishes the communication for the vector compress operation.