Reference documentation for deal.II version 9.1.0-pre
cuda_matrix_free.h
1 // ---------------------------------------------------------------------
2 //
3 // Copyright (C) 2016 - 2018 by the deal.II authors
4 //
5 // This file is part of the deal.II library.
6 //
7 // The deal.II library is free software; you can use it, redistribute
8 // it, and/or modify it under the terms of the GNU Lesser General
9 // Public License as published by the Free Software Foundation; either
10 // version 2.1 of the License, or (at your option) any later version.
11 // The full text of the license can be found in the file LICENSE.md at
12 // the top level directory of deal.II.
13 //
14 // ---------------------------------------------------------------------
15 
16 
17 #ifndef dealii_cuda_matrix_free_h
18 #define dealii_cuda_matrix_free_h
19 
20 #include <deal.II/base/config.h>
21 
22 #ifdef DEAL_II_WITH_CUDA
23 
24 # include <deal.II/base/quadrature.h>
25 # include <deal.II/base/tensor.h>
26 
27 # include <deal.II/dofs/dof_handler.h>
28 
29 # include <deal.II/fe/fe_update_flags.h>
30 # include <deal.II/fe/mapping.h>
31 # include <deal.II/fe/mapping_q1.h>
32 
33 # include <deal.II/lac/affine_constraints.h>
34 # include <deal.II/lac/cuda_vector.h>
35 
36 # include <cuda_runtime_api.h>
37 
38 
39 DEAL_II_NAMESPACE_OPEN
40 
41 namespace CUDAWrappers
42 {
43  // forward declaration
44  namespace internal
45  {
46  template <int dim, typename Number>
47  class ReinitHelper;
48  }
49 
76  template <int dim, typename Number = double>
77  class MatrixFree : public Subscriptor
78  {
79  public:
81  // TODO this should really be a CUDAWrappers::Point
83 
84  // Use Number2 so we don't hide the template parameter Number
85  template <typename Number2>
87 
94  {
95  parallel_in_elem,
96  parallel_over_elem
97  };
98 
99  struct AdditionalData
100  {
101  AdditionalData(
102  const ParallelizationScheme parallelization_scheme = parallel_in_elem,
103  const UpdateFlags mapping_update_flags = update_gradients |
105  : parallelization_scheme(parallelization_scheme)
106  , mapping_update_flags(mapping_update_flags)
107  {}
108 
112  unsigned int n_colors;
117  ParallelizationScheme parallelization_scheme;
127  UpdateFlags mapping_update_flags;
128  };
129 
134  struct Data
135  {
136  point_type * q_points;
137  unsigned int *local_to_global;
138  Number * inv_jacobian;
139  Number * JxW;
140  unsigned int n_cells;
141  unsigned int padding_length;
142  unsigned int row_start;
143  unsigned int *constraint_mask;
144  };
145 
149  MatrixFree();
150 
151  unsigned int
152  get_padding_length() const;
153 
162  void
163  reinit(const Mapping<dim> & mapping,
164  const DoFHandler<dim> & dof_handler,
165  const AffineConstraints<Number> &constraints,
166  const Quadrature<1> & quad,
167  const AdditionalData additional_data = AdditionalData());
168 
172  void
173  reinit(const DoFHandler<dim> & dof_handler,
174  const AffineConstraints<Number> &constraints,
175  const Quadrature<1> & quad,
176  const AdditionalData AdditionalData = AdditionalData());
177 
181  Data
182  get_data(unsigned int color) const;
183 
188  template <typename functor>
189  void
190  cell_loop(const functor & func,
191  const CUDAVector<Number> &src,
192  CUDAVector<Number> & dst) const;
193 
194  void
195  copy_constrained_values(const CUDAVector<Number> &src,
196  CUDAVector<Number> & dst) const;
197 
198  void
199  set_constrained_values(const Number val, CUDAVector<Number> &dst) const;
200 
204  void
205  free();
206 
210  std::size_t
211  memory_consumption() const;
212 
213  private:
222  unsigned int fe_degree;
226  unsigned int dofs_per_cell;
230  unsigned int n_constrained_dofs;
234  unsigned int q_points_per_cell;
238  unsigned int n_colors;
242  std::vector<unsigned int> n_cells;
247  std::vector<point_type *> q_points;
252  std::vector<unsigned int *> local_to_global;
257  std::vector<Number *> inv_jacobian;
262  std::vector<Number *> JxW;
263 
264  // Constraints
265  unsigned int * constrained_dofs;
266  std::vector<unsigned int *> constraint_mask;
271  std::vector<dim3> grid_dim;
276  std::vector<dim3> block_dim;
277 
278  // Parallelization parameter
279  unsigned int cells_per_block;
280  dim3 constraint_grid_dim;
281  dim3 constraint_block_dim;
282 
283  unsigned int padding_length;
284  std::vector<unsigned int> row_start;
285 
286  friend class internal::ReinitHelper<dim, Number>;
287  };
288 
289 
290 
291  // TODO find a better place to put these things
292  // Structure to pass the shared memory into a general user function.
293  template <int dim, typename Number>
294  struct SharedData
295  {
296  __device__
297  SharedData(Number *vd, Number *gq[dim])
298  : values(vd)
299  {
300  for (int d = 0; d < dim; ++d)
301  gradients[d] = gq[d];
302  }
303 
304  Number *values;
305  Number *gradients[dim];
306  };
307 
308 
309 
310  // This function determines the number of cells per block, possibly at compile
311  // time (by virtue of being 'constexpr')
312  // TODO this function should be rewritten using meta-programming
313  __host__ __device__ constexpr unsigned int
314  cells_per_block_shmem(int dim, int fe_degree)
315  {
316  /* clang-format off */
317  return dim==2 ? (fe_degree==1 ? 32 :
318  fe_degree==2 ? 8 :
319  fe_degree==3 ? 4 :
320  fe_degree==4 ? 4 :
321  1) :
322  dim==3 ? (fe_degree==1 ? 8 :
323  fe_degree==2 ? 2 :
324  1) : 1;
325  /* clang-format on */
326  }
327 } // namespace CUDAWrappers
328 
329 DEAL_II_NAMESPACE_CLOSE
330 
331 #endif
332 
333 #endif
Transformed quadrature weights.
ParallelizationScheme parallelization_scheme
std::vector< unsigned int * > local_to_global
std::vector< point_type * > q_points
UpdateFlags
std::vector< Number * > inv_jacobian
std::vector< dim3 > block_dim
std::vector< dim3 > grid_dim
Definition: mpi.h:55
Shape function gradients.
std::vector< Number * > JxW
std::vector< unsigned int > n_cells