17 #ifndef dealii_cuda_tensor_product_kernels_h 18 #define dealii_cuda_tensor_product_kernels_h 20 #include <deal.II/base/config.h> 23 DEAL_II_NAMESPACE_OPEN
67 template <
int dim,
int fe_degree,
int n_q_po
ints_1d,
typename Number>
74 static constexpr
unsigned int dofs_per_cell =
76 static constexpr
unsigned int n_q_points =
86 template <
int direction,
bool dof_to_quad,
bool add,
bool in_place>
88 values(
const Number *in, Number *out)
const;
94 template <
int direction,
bool dof_to_quad,
bool add,
bool in_place>
96 gradients(
const Number *in, Number *out)
const;
101 template <
int direction,
bool dof_to_quad,
bool add,
bool in_place>
103 apply(Number shape_data[],
const Number *in, Number *out)
const;
109 value_at_quad_pts(Number *u);
115 integrate_value(Number *u);
122 gradient_at_quad_pts(
const Number *
const u, Number *grad_u[dim]);
130 integrate_gradient(Number *u, Number *grad_u[dim]);
135 template <
int dim,
int fe_degree,
int n_q_po
ints_1d,
typename Number>
146 template <
int dim,
int fe_degree,
int n_q_po
ints_1d,
typename Number>
147 template <
int direction,
bool dof_to_quad,
bool add,
bool in_place>
153 Number>::values(
const Number *in, Number *out)
const 155 apply<direction, dof_to_quad, add, in_place>(global_shape_values,
162 template <
int dim,
int fe_degree,
int n_q_po
ints_1d,
typename Number>
163 template <
int direction,
bool dof_to_quad,
bool add,
bool in_place>
169 Number>::gradients(
const Number *in,
172 apply<direction, dof_to_quad, add, in_place>(global_shape_gradients,
179 template <
int dim,
int fe_degree,
int n_q_po
ints_1d,
typename Number>
180 template <
int direction,
bool dof_to_quad,
bool add,
bool in_place>
186 Number>::apply(Number shape_data[],
190 const unsigned int i = (dim == 1) ? 0 : threadIdx.x % n_q_points_1d;
191 const unsigned int j = (dim == 3) ? threadIdx.y : 0;
192 const unsigned int q = (dim == 1) ?
193 (threadIdx.x % n_q_points_1d) :
194 (dim == 2) ? threadIdx.y : threadIdx.z;
199 for (
int k = 0; k < n_q_points_1d; ++k)
201 const unsigned int shape_idx =
202 dof_to_quad ? (q + k * n_q_points_1d) : (k + q * n_q_points_1d);
203 const unsigned int source_idx =
205 (k + n_q_points_1d * (i + n_q_points_1d * j)) :
206 (direction == 1) ? (i + n_q_points_1d * (k + n_q_points_1d * j)) :
207 (i + n_q_points_1d * (j + n_q_points_1d * k));
208 t += shape_data[shape_idx] *
209 (in_place ? out[source_idx] : in[source_idx]);
215 const unsigned int destination_idx =
217 (q + n_q_points_1d * (i + n_q_points_1d * j)) :
218 (direction == 1) ? (i + n_q_points_1d * (q + n_q_points_1d * j)) :
219 (i + n_q_points_1d * (j + n_q_points_1d * q));
222 out[destination_idx] += t;
224 out[destination_idx] = t;
229 template <
int dim,
int fe_degree,
int n_q_po
ints_1d,
typename Number>
230 inline __device__
void 235 Number>::value_at_quad_pts(Number *u)
241 values<0, true, false, true>(u, u);
247 values<0, true, false, true>(u, u);
249 values<1, true, false, true>(u, u);
255 values<0, true, false, true>(u, u);
257 values<1, true, false, true>(u, u);
259 values<2, true, false, true>(u, u);
273 template <
int dim,
int fe_degree,
int n_q_po
ints_1d,
typename Number>
274 inline __device__
void 279 Number>::integrate_value(Number *u)
285 values<0, false, false, true>(u, u);
291 values<0, false, false, true>(u, u);
293 values<1, false, false, true>(u, u);
299 values<0, false, false, true>(u, u);
301 values<1, false, false, true>(u, u);
303 values<2, false, false, true>(u, u);
317 template <
int dim,
int fe_degree,
int n_q_po
ints_1d,
typename Number>
318 inline __device__
void 323 Number>::gradient_at_quad_pts(
const Number *
const u,
330 gradients<0, true, false, false>(u, grad_u[0]);
336 gradients<0, true, false, false>(u, grad_u[0]);
337 values<0, true, false, false>(u, grad_u[1]);
341 values<1, true, false, true>(grad_u[0], grad_u[0]);
342 gradients<1, true, false, true>(grad_u[1], grad_u[1]);
348 gradients<0, true, false, false>(u, grad_u[0]);
349 values<0, true, false, false>(u, grad_u[1]);
350 values<0, true, false, false>(u, grad_u[2]);
354 values<1, true, false, true>(grad_u[0], grad_u[0]);
355 gradients<1, true, false, true>(grad_u[1], grad_u[1]);
356 values<1, true, false, true>(grad_u[2], grad_u[2]);
360 values<2, true, false, true>(grad_u[0], grad_u[0]);
361 values<2, true, false, true>(grad_u[1], grad_u[1]);
362 gradients<2, true, false, true>(grad_u[2], grad_u[2]);
376 template <
int dim,
int fe_degree,
int n_q_po
ints_1d,
typename Number>
378 inline __device__
void 383 Number>::integrate_gradient(Number *u,
390 gradients<0, false, add, false>(grad_u[dim], u);
396 gradients<0, false, false, true>(grad_u[0], grad_u[0]);
397 values<0, false, false, true>(grad_u[1], grad_u[1]);
401 values<1, false, add, false>(grad_u[0], u);
403 gradients<1, false, true, false>(grad_u[1], u);
409 gradients<0, false, false, true>(grad_u[0], grad_u[0]);
410 values<0, false, false, true>(grad_u[1], grad_u[1]);
411 values<0, false, false, true>(grad_u[2], grad_u[2]);
415 values<1, false, false, true>(grad_u[0], grad_u[0]);
416 gradients<1, false, false, true>(grad_u[1], grad_u[1]);
417 values<1, false, false, true>(grad_u[2], grad_u[2]);
421 values<2, false, add, false>(grad_u[0], u);
423 values<2, false, true, false>(grad_u[1], u);
425 gradients<2, false, true, false>(grad_u[2], u);
439 DEAL_II_NAMESPACE_CLOSE
constexpr unsigned int pow(const unsigned int base, const unsigned int iexp)