16 #ifndef dealii_cuda_fe_evaluation_h 17 #define dealii_cuda_fe_evaluation_h 19 #include <deal.II/base/tensor.h> 20 #include <deal.II/base/utilities.h> 22 #include <deal.II/lac/cuda_vector.h> 24 #include <deal.II/matrix_free/cuda_matrix_free.h> 25 #include <deal.II/matrix_free/cuda_matrix_free.templates.h> 26 #include <deal.II/matrix_free/cuda_tensor_product_kernels.h> 28 DEAL_II_NAMESPACE_OPEN
37 template <
int dim,
int fe_degree,
bool transpose,
typename Number>
39 resolve_hanging_nodes_shmem(Number *values,
const unsigned int constr)
76 int n_q_points_1d = fe_degree + 1,
77 int n_components_ = 1,
78 typename Number =
double>
82 using value_type = Number;
85 static constexpr
unsigned int dimension = dim;
86 static constexpr
unsigned int n_components = n_components_;
87 static constexpr
unsigned int n_q_points =
89 static constexpr
unsigned int tensor_dofs_per_cell =
97 const data_type * data,
98 SharedData<dim, Number> *shdata);
109 read_dof_values(
const Number *src);
118 distribute_local_to_global(Number *dst)
const;
128 evaluate(
const bool evaluate_val,
const bool evaluate_grad);
138 integrate(
const bool integrate_val,
const bool integrate_grad);
144 __device__ value_type
145 get_value(
const unsigned int q_point)
const;
154 submit_value(
const value_type &val_in,
const unsigned int q_point);
161 get_gradient(
const unsigned int q_point)
const;
168 submit_gradient(
const gradient_type &grad_in,
const unsigned int q_point);
173 template <
typename functor>
175 apply_quad_point_operations(
const functor &func);
178 unsigned int *local_to_global;
179 unsigned int n_cells;
180 unsigned int padding_length;
182 const unsigned int constraint_mask;
189 Number *gradients[dim];
202 const data_type * data,
203 SharedData<dim, Number> *shdata)
204 : n_cells(data->n_cells)
205 , padding_length(data->padding_length)
206 , constraint_mask(data->constraint_mask[cell_id])
207 , values(shdata->values)
209 local_to_global = data->local_to_global + padding_length * cell_id;
210 inv_jac = data->inv_jacobian + padding_length * cell_id;
211 JxW = data->JxW + padding_length * cell_id;
213 for (
unsigned int i = 0; i < dim; ++i)
214 gradients[i] = shdata->gradients[i];
228 static_assert(n_components_ == 1,
"This function only supports FE with one \ 230 const unsigned int idx =
231 (threadIdx.x % n_q_points_1d) +
232 (dim > 1 ? threadIdx.y : 0) * n_q_points_1d +
233 (dim > 2 ? threadIdx.z : 0) * n_q_points_1d * n_q_points_1d;
235 const unsigned int src_idx = local_to_global[idx];
237 values[idx] = __ldg(&src[src_idx]);
240 internal::resolve_hanging_nodes_shmem<dim, fe_degree, false>(
241 values, constraint_mask);
257 static_assert(n_components_ == 1,
"This function only supports FE with one \ 260 internal::resolve_hanging_nodes_shmem<dim, fe_degree, true>(
261 values, constraint_mask);
264 const unsigned int idx =
265 (threadIdx.x % n_q_points_1d) +
266 (dim > 1 ? threadIdx.y : 0) * n_q_points_1d +
267 (dim > 2 ? threadIdx.z : 0) * n_q_points_1d * n_q_points_1d;
268 const unsigned int destination_idx = local_to_global[idx];
270 dst[destination_idx] += values[idx];
282 const bool evaluate_val,
283 const bool evaluate_grad)
288 internal::EvaluatorVariant::evaluate_general,
293 evaluator_tensor_product;
294 if (evaluate_grad ==
true)
296 evaluator_tensor_product.gradient_at_quad_pts(values, gradients);
300 if (evaluate_val ==
true)
302 evaluator_tensor_product.value_at_quad_pts(values);
316 const bool integrate_val,
317 const bool integrate_grad)
320 internal::EvaluatorVariant::evaluate_general,
325 evaluator_tensor_product;
326 if (integrate_val ==
true)
328 evaluator_tensor_product.integrate_value(values);
330 if (integrate_grad ==
true)
332 evaluator_tensor_product.integrate_gradient<
true>(values,
337 else if (integrate_grad ==
true)
339 evaluator_tensor_product.integrate_gradient<
false>(values, gradients);
357 const unsigned int q_point)
const 359 return values[q_point];
373 values[q_point] = val_in * JxW[q_point];
391 static_assert(n_components_ == 1,
"This function only supports FE with one \ 394 const Number *inv_jacobian = &inv_jac[q_point];
396 for (
int d_1 = 0; d_1 < dim; ++d_1)
399 for (
int d_2 = 0; d_2 < dim; ++d_2)
400 tmp += inv_jacobian[padding_length * n_cells * (dim * d_2 + d_1)] *
401 gradients[d_2][q_point];
420 const Number *inv_jacobian = &inv_jac[q_point];
421 for (
int d_1 = 0; d_1 < dim; ++d_1)
424 for (
int d_2 = 0; d_2 < dim; ++d_2)
425 tmp += inv_jacobian[n_cells * padding_length * (dim * d_1 + d_2)] *
427 gradients[d_1][q_point] = tmp * JxW[q_point];
438 template <
typename functor>
443 const unsigned int q_point =
445 threadIdx.x % n_q_points_1d :
447 threadIdx.x % n_q_points_1d + n_q_points_1d * threadIdx.y :
448 threadIdx.x % n_q_points_1d +
449 n_q_points_1d * (threadIdx.y + n_q_points_1d * threadIdx.z));
456 DEAL_II_NAMESPACE_CLOSE
void submit_value(const value_type &val_in, const unsigned int q_point)
constexpr unsigned int pow(const unsigned int base, const unsigned int iexp)
void evaluate(const bool evaluate_val, const bool evaluate_grad)
FEEvaluation(int cell_id, const data_type *data, SharedData< dim, Number > *shdata)
void apply_quad_point_operations(const functor &func)
gradient_type get_gradient(const unsigned int q_point) const
value_type get_value(const unsigned int q_point) const
void read_dof_values(const Number *src)
void submit_gradient(const gradient_type &grad_in, const unsigned int q_point)
void distribute_local_to_global(Number *dst) const
void integrate(const bool integrate_val, const bool integrate_grad)