Reference documentation for deal.II version 9.1.0-pre
parallel.h
1 // ---------------------------------------------------------------------
2 //
3 // Copyright (C) 2008 - 2018 by the deal.II authors
4 //
5 // This file is part of the deal.II library.
6 //
7 // The deal.II library is free software; you can use it, redistribute
8 // it, and/or modify it under the terms of the GNU Lesser General
9 // Public License as published by the Free Software Foundation; either
10 // version 2.1 of the License, or (at your option) any later version.
11 // The full text of the license can be found in the file LICENSE.md at
12 // the top level directory of deal.II.
13 //
14 // ---------------------------------------------------------------------
15 
16 #ifndef dealii_parallel_h
17 #define dealii_parallel_h
18 
19 
20 #include <deal.II/base/config.h>
21 
22 #include <deal.II/base/exceptions.h>
23 #include <deal.II/base/synchronous_iterator.h>
24 #include <deal.II/base/template_constraints.h>
25 #include <deal.II/base/thread_management.h>
26 
27 #include <cstddef>
28 #include <functional>
29 #include <memory>
30 #include <tuple>
31 
32 #ifdef DEAL_II_WITH_THREADS
33 # include <tbb/blocked_range.h>
34 # include <tbb/parallel_for.h>
35 # include <tbb/parallel_reduce.h>
36 # include <tbb/partitioner.h>
37 #endif
38 
39 
40 // TODO[WB]: allow calling functions to pass along a tbb::affinity_partitioner
41 // object to ensure that subsequent calls use the same cache lines
42 
43 DEAL_II_NAMESPACE_OPEN
44 
45 namespace parallel
46 {
47  namespace internal
48  {
53  template <typename Number>
55  {
56  static const bool value = true;
57  };
58 
59 #ifdef __INTEL_COMPILER
60  // Disable long double SIMD instructions on ICC. This is to work around a
61  // bug that generates wrong code at least up to intel 15 (see
62  // tests/lac/vector-vector, tests/lac/intel-15-bug, and the discussion at
63  // https://github.com/dealii/dealii/issues/598).
64  template <>
65  struct EnableOpenMPSimdFor<long double>
66  {
67  static const bool value = false;
68  };
69 #endif
70 
71 
72 
77  template <typename F>
78  struct Body
79  {
83  Body(const F &f)
84  : f(f)
85  {}
86 
87  template <typename Range>
88  void
89  operator()(const Range &range) const
90  {
91  for (typename Range::const_iterator p = range.begin(); p != range.end();
92  ++p)
93  apply(f, *p);
94  }
95 
96  private:
100  const F f;
101 
105  template <typename I1, typename I2>
106  static void
107  apply(const F &f, const std::tuple<I1, I2> &p)
108  {
109  *std::get<1>(p) = f(*std::get<0>(p));
110  }
111 
115  template <typename I1, typename I2, typename I3>
116  static void
117  apply(const F &f, const std::tuple<I1, I2, I3> &p)
118  {
119  *std::get<2>(p) = f(*std::get<0>(p), *std::get<1>(p));
120  }
121 
125  template <typename I1, typename I2, typename I3, typename I4>
126  static void
127  apply(const F &f, const std::tuple<I1, I2, I3, I4> &p)
128  {
129  *std::get<3>(p) = f(*std::get<0>(p), *std::get<1>(p), *std::get<2>(p));
130  }
131  };
132 
133 
140  template <typename F>
141  Body<F>
142  make_body(const F &f)
143  {
144  return Body<F>(f);
145  }
146  } // namespace internal
147 
171  template <typename InputIterator, typename OutputIterator, typename Predicate>
172  void
173  transform(const InputIterator &begin_in,
174  const InputIterator &end_in,
175  OutputIterator out,
176  Predicate & predicate,
177  const unsigned int grainsize)
178  {
179 #ifndef DEAL_II_WITH_THREADS
180  // make sure we don't get compiler
181  // warnings about unused arguments
182  (void)grainsize;
183 
184  for (OutputIterator in = begin_in; in != end_in;)
185  *out++ = predicate(*in++);
186 #else
187  using Iterators = std::tuple<InputIterator, OutputIterator>;
188  using SyncIterators = SynchronousIterators<Iterators>;
189  Iterators x_begin(begin_in, out);
190  Iterators x_end(end_in, OutputIterator());
191  tbb::parallel_for(tbb::blocked_range<SyncIterators>(x_begin,
192  x_end,
193  grainsize),
194  internal::make_body(predicate),
195  tbb::auto_partitioner());
196 #endif
197  }
198 
199 
200 
224  template <typename InputIterator1,
225  typename InputIterator2,
226  typename OutputIterator,
227  typename Predicate>
228  void
229  transform(const InputIterator1 &begin_in1,
230  const InputIterator1 &end_in1,
231  InputIterator2 in2,
232  OutputIterator out,
233  Predicate & predicate,
234  const unsigned int grainsize)
235  {
236 #ifndef DEAL_II_WITH_THREADS
237  // make sure we don't get compiler
238  // warnings about unused arguments
239  (void)grainsize;
240 
241  for (OutputIterator in1 = begin_in1; in1 != end_in1;)
242  *out++ = predicate(*in1++, *in2++);
243 #else
244  using Iterators =
245  std::tuple<InputIterator1, InputIterator2, OutputIterator>;
246  using SyncIterators = SynchronousIterators<Iterators>;
247  Iterators x_begin(begin_in1, in2, out);
248  Iterators x_end(end_in1, InputIterator2(), OutputIterator());
249  tbb::parallel_for(tbb::blocked_range<SyncIterators>(x_begin,
250  x_end,
251  grainsize),
252  internal::make_body(predicate),
253  tbb::auto_partitioner());
254 #endif
255  }
256 
257 
258 
282  template <typename InputIterator1,
283  typename InputIterator2,
284  typename InputIterator3,
285  typename OutputIterator,
286  typename Predicate>
287  void
288  transform(const InputIterator1 &begin_in1,
289  const InputIterator1 &end_in1,
290  InputIterator2 in2,
291  InputIterator3 in3,
292  OutputIterator out,
293  Predicate & predicate,
294  const unsigned int grainsize)
295  {
296 #ifndef DEAL_II_WITH_THREADS
297  // make sure we don't get compiler
298  // warnings about unused arguments
299  (void)grainsize;
300 
301  for (OutputIterator in1 = begin_in1; in1 != end_in1;)
302  *out++ = predicate(*in1++, *in2++, *in3++);
303 #else
304  using Iterators = std::
305  tuple<InputIterator1, InputIterator2, InputIterator3, OutputIterator>;
306  using SyncIterators = SynchronousIterators<Iterators>;
307  Iterators x_begin(begin_in1, in2, in3, out);
308  Iterators x_end(end_in1,
309  InputIterator2(),
310  InputIterator3(),
311  OutputIterator());
312  tbb::parallel_for(tbb::blocked_range<SyncIterators>(x_begin,
313  x_end,
314  grainsize),
315  internal::make_body(predicate),
316  tbb::auto_partitioner());
317 #endif
318  }
319 
320 
321  namespace internal
322  {
323 #ifdef DEAL_II_WITH_THREADS
324 
328  template <typename RangeType, typename Function>
329  void
330  apply_to_subranges(const tbb::blocked_range<RangeType> &range,
331  const Function & f)
332  {
333  f(range.begin(), range.end());
334  }
335 #endif
336  } // namespace internal
337 
338 
410  template <typename RangeType, typename Function>
411  void
412  apply_to_subranges(const RangeType & begin,
413  const typename identity<RangeType>::type &end,
414  const Function & f,
415  const unsigned int grainsize)
416  {
417 #ifndef DEAL_II_WITH_THREADS
418  // make sure we don't get compiler
419  // warnings about unused arguments
420  (void)grainsize;
421 
422 # ifndef DEAL_II_BIND_NO_CONST_OP_PARENTHESES
423  f(begin, end);
424 # else
425  // work around a problem with MS VC++ where there is no const
426  // operator() in 'Function' if 'Function' is the result of std::bind
427  Function ff = f;
428  ff(begin, end);
429 # endif
430 #else
431  tbb::parallel_for(
432  tbb::blocked_range<RangeType>(begin, end, grainsize),
433  std::bind(&internal::apply_to_subranges<RangeType, Function>,
434  std::placeholders::_1,
435  std::cref(f)),
436  tbb::auto_partitioner());
437 #endif
438  }
439 
440 
441 
470  {
475  virtual ~ParallelForInteger() = default;
476 
485  void
486  apply_parallel(const std::size_t begin,
487  const std::size_t end,
488  const std::size_t minimum_parallel_grain_size) const;
489 
496  virtual void
497  apply_to_subrange(const std::size_t, const std::size_t) const = 0;
498  };
499 
500 
501 
502  namespace internal
503  {
504 #ifdef DEAL_II_WITH_THREADS
505 
511  template <typename ResultType, typename Function>
513  {
517  ResultType result;
518 
528  template <typename Reductor>
530  const Reductor & reductor,
531  const ResultType neutral_element = ResultType())
532  : result(neutral_element)
533  , f(f)
534  , neutral_element(neutral_element)
535  , reductor(reductor)
536  {}
537 
542  : result(r.neutral_element)
543  , f(r.f)
544  , neutral_element(r.neutral_element)
545  , reductor(r.reductor)
546  {}
547 
552  void
554  {
555  result = reductor(result, r.result);
556  }
557 
561  template <typename RangeType>
562  void
563  operator()(const tbb::blocked_range<RangeType> &range)
564  {
565  result = reductor(result, f(range.begin(), range.end()));
566  }
567 
568  private:
572  const Function f;
573 
579  const ResultType neutral_element;
580 
585  const std::function<ResultType(ResultType, ResultType)> reductor;
586  };
587 #endif
588  } // namespace internal
589 
590 
650  template <typename ResultType, typename RangeType, typename Function>
651  ResultType
653  const RangeType & begin,
654  const typename identity<RangeType>::type &end,
655  const unsigned int grainsize)
656  {
657 #ifndef DEAL_II_WITH_THREADS
658  // make sure we don't get compiler
659  // warnings about unused arguments
660  (void)grainsize;
661 
662 # ifndef DEAL_II_BIND_NO_CONST_OP_PARENTHESES
663  return f(begin, end);
664 # else
665  // work around a problem with MS VC++ where there is no const
666  // operator() in 'Function' if 'Function' is the result of std::bind
667  Function ff = f;
668  return ff(begin, end);
669 # endif
670 #else
672  f, std::plus<ResultType>(), 0);
673  tbb::parallel_reduce(tbb::blocked_range<RangeType>(begin, end, grainsize),
674  reductor,
675  tbb::auto_partitioner());
676  return reductor.result;
677 #endif
678  }
679 
680 
681  // --------------------- for loop affinity partitioner -----------------------
682 
692  namespace internal
693  {
694  class TBBPartitioner
695  {
696  public:
700  TBBPartitioner();
701 
702 #ifdef DEAL_II_WITH_THREADS
703 
707  ~TBBPartitioner();
708 
715  std::shared_ptr<tbb::affinity_partitioner>
716  acquire_one_partitioner();
717 
723  void
724  release_one_partitioner(std::shared_ptr<tbb::affinity_partitioner> &p);
725 
726  private:
731  std::shared_ptr<tbb::affinity_partitioner> my_partitioner;
732 
737  bool in_use;
738 
742  ::Threads::Mutex mutex;
743 #endif
744  };
745  } // namespace internal
746 } // namespace parallel
747 
748 
749 namespace internal
750 {
751  namespace VectorImplementation
752  {
767  extern unsigned int minimum_parallel_grain_size;
768  } // namespace VectorImplementation
769 
770 
771  namespace SparseMatrixImplementation
772  {
778  extern unsigned int minimum_parallel_grain_size;
779  } // namespace SparseMatrixImplementation
780 
781 } // end of namespace internal
782 
783 
784 /* --------------------------- inline functions ------------------------- */
785 
786 namespace parallel
787 {
788 #ifdef DEAL_II_WITH_THREADS
789 
790  namespace internal
791  {
797  {
799  : worker_(worker)
800  {}
801 
802  void
803  operator()(const tbb::blocked_range<std::size_t> &range) const
804  {
805  worker_.apply_to_subrange(range.begin(), range.end());
806  }
807 
808  const parallel::ParallelForInteger &worker_;
809  };
810  } // namespace internal
811 
812 #endif
813 
814 
815  inline void
817  const std::size_t begin,
818  const std::size_t end,
819  const std::size_t minimum_parallel_grain_size) const
820  {
821 #ifndef DEAL_II_WITH_THREADS
822  // make sure we don't get compiler
823  // warnings about unused arguments
824  (void)minimum_parallel_grain_size;
825 
826  apply_to_subrange(begin, end);
827 #else
828  internal::ParallelForWrapper worker(*this);
829  tbb::parallel_for(
830  tbb::blocked_range<std::size_t>(begin, end, minimum_parallel_grain_size),
831  worker,
832  tbb::auto_partitioner());
833 #endif
834  }
835 
836 } // end of namespace parallel
837 
838 DEAL_II_NAMESPACE_CLOSE
839 
840 #endif
Body< F > make_body(const F &f)
Definition: parallel.h:142
void apply_parallel(const std::size_t begin, const std::size_t end, const std::size_t minimum_parallel_grain_size) const
Definition: parallel.h:816
void join(const ReductionOnSubranges &r)
Definition: parallel.h:553
static void apply(const F &f, const std::tuple< I1, I2, I3 > &p)
Definition: parallel.h:117
void operator()(const tbb::blocked_range< RangeType > &range)
Definition: parallel.h:563
ReductionOnSubranges(const Function &f, const Reductor &reductor, const ResultType neutral_element=ResultType())
Definition: parallel.h:529
static void apply(const F &f, const std::tuple< I1, I2, I3, I4 > &p)
Definition: parallel.h:127
void apply_to_subranges(const tbb::blocked_range< RangeType > &range, const Function &f)
Definition: parallel.h:330
ReductionOnSubranges(const ReductionOnSubranges &r, tbb::split)
Definition: parallel.h:541
const std::function< ResultType(ResultType, ResultType)> reductor
Definition: parallel.h:585
static void apply(const F &f, const std::tuple< I1, I2 > &p)
Definition: parallel.h:107
ResultType accumulate_from_subranges(const Function &f, const RangeType &begin, const typename identity< RangeType >::type &end, const unsigned int grainsize)
Definition: parallel.h:652
void transform(const InputIterator &begin_in, const InputIterator &end_in, OutputIterator out, Predicate &predicate, const unsigned int grainsize)
Definition: parallel.h:173