Reference documentation for deal.II version 9.1.0-pre
tensor_product_kernels.h
1 // ---------------------------------------------------------------------
2 //
3 // Copyright (C) 2017 - 2018-2018 by the deal.II authors
4 //
5 // This file is part of the deal.II library.
6 //
7 // The deal.II library is free software; you can use it, redistribute
8 // it, and/or modify it under the terms of the GNU Lesser General
9 // Public License as published by the Free Software Foundation; either
10 // version 2.1 of the License, or (at your option) any later version.
11 // The full text of the license can be found in the file LICENSE.md at
12 // the top level directory of deal.II.
13 //
14 // ---------------------------------------------------------------------
15 
16 
17 #ifndef dealii_matrix_free_tensor_product_kernels_h
18 #define dealii_matrix_free_tensor_product_kernels_h
19 
20 #include <deal.II/base/config.h>
21 
22 #include <deal.II/base/aligned_vector.h>
23 #include <deal.II/base/utilities.h>
24 
25 
26 DEAL_II_NAMESPACE_OPEN
27 
28 
29 
30 namespace internal
31 {
37  {
66  };
67 
68 
69 
90  template <EvaluatorVariant variant,
91  int dim,
92  int n_rows,
93  int n_columns,
94  typename Number,
95  typename Number2 = Number>
97  {};
98 
99 
100 
118  template <int dim,
119  int n_rows,
120  int n_columns,
121  typename Number,
122  typename Number2>
124  dim,
125  n_rows,
126  n_columns,
127  Number,
128  Number2>
129  {
130  static constexpr unsigned int n_rows_of_product =
131  Utilities::pow(n_rows, dim);
132  static constexpr unsigned int n_columns_of_product =
133  Utilities::pow(n_columns, dim);
134 
140  : shape_values(nullptr)
141  , shape_gradients(nullptr)
142  , shape_hessians(nullptr)
143  {}
144 
149  const AlignedVector<Number2> &shape_gradients,
150  const AlignedVector<Number2> &shape_hessians,
151  const unsigned int dummy1 = 0,
152  const unsigned int dummy2 = 0)
153  : shape_values(shape_values.begin())
154  , shape_gradients(shape_gradients.begin())
155  , shape_hessians(shape_hessians.begin())
156  {
157  // We can enter this function either for the apply() path that has
158  // n_rows * n_columns entries or for the apply_face() path that only has
159  // n_rows * 3 entries in the array. Since we cannot decide about the use
160  // we must allow for both here.
161  Assert(shape_values.size() == 0 ||
162  shape_values.size() == n_rows * n_columns ||
163  shape_values.size() == 3 * n_rows,
164  ExcDimensionMismatch(shape_values.size(), n_rows * n_columns));
165  Assert(shape_gradients.size() == 0 ||
166  shape_gradients.size() == n_rows * n_columns,
167  ExcDimensionMismatch(shape_gradients.size(), n_rows * n_columns));
168  Assert(shape_hessians.size() == 0 ||
169  shape_hessians.size() == n_rows * n_columns,
170  ExcDimensionMismatch(shape_hessians.size(), n_rows * n_columns));
171  (void)dummy1;
172  (void)dummy2;
173  }
174 
175  template <int direction, bool contract_over_rows, bool add>
176  void
177  values(const Number in[], Number out[]) const
178  {
179  apply<direction, contract_over_rows, add>(shape_values, in, out);
180  }
181 
182  template <int direction, bool contract_over_rows, bool add>
183  void
184  gradients(const Number in[], Number out[]) const
185  {
186  apply<direction, contract_over_rows, add>(shape_gradients, in, out);
187  }
188 
189  template <int direction, bool contract_over_rows, bool add>
190  void
191  hessians(const Number in[], Number out[]) const
192  {
193  apply<direction, contract_over_rows, add>(shape_hessians, in, out);
194  }
195 
196  template <int direction, bool contract_over_rows, bool add>
197  void
198  values_one_line(const Number in[], Number out[]) const
199  {
200  Assert(shape_values != nullptr, ExcNotInitialized());
201  apply<direction, contract_over_rows, add, true>(shape_values, in, out);
202  }
203 
204  template <int direction, bool contract_over_rows, bool add>
205  void
206  gradients_one_line(const Number in[], Number out[]) const
207  {
208  Assert(shape_gradients != nullptr, ExcNotInitialized());
209  apply<direction, contract_over_rows, add, true>(shape_gradients, in, out);
210  }
211 
212  template <int direction, bool contract_over_rows, bool add>
213  void
214  hessians_one_line(const Number in[], Number out[]) const
215  {
216  Assert(shape_hessians != nullptr, ExcNotInitialized());
217  apply<direction, contract_over_rows, add, true>(shape_hessians, in, out);
218  }
219 
244  template <int direction,
245  bool contract_over_rows,
246  bool add,
247  bool one_line = false>
248  static void
249  apply(const Number2 *DEAL_II_RESTRICT shape_data,
250  const Number * in,
251  Number * out);
252 
282  template <int face_direction,
283  bool contract_onto_face,
284  bool add,
285  int max_derivative>
286  void
287  apply_face(const Number *DEAL_II_RESTRICT in,
288  Number *DEAL_II_RESTRICT out) const;
289 
290  const Number2 *shape_values;
291  const Number2 *shape_gradients;
292  const Number2 *shape_hessians;
293  };
294 
295 
296 
297  template <int dim,
298  int n_rows,
299  int n_columns,
300  typename Number,
301  typename Number2>
302  template <int direction, bool contract_over_rows, bool add, bool one_line>
303  inline void
305  dim,
306  n_rows,
307  n_columns,
308  Number,
309  Number2>::apply(const Number2 *DEAL_II_RESTRICT
310  shape_data,
311  const Number * in,
312  Number * out)
313  {
314  static_assert(one_line == false || direction == dim - 1,
315  "Single-line evaluation only works for direction=dim-1.");
316  Assert(shape_data != nullptr,
317  ExcMessage(
318  "The given array shape_data must not be the null pointer!"));
319  Assert(dim == direction + 1 || one_line == true || n_rows == n_columns ||
320  in != out,
321  ExcMessage("In-place operation only supported for "
322  "n_rows==n_columns or single-line interpolation"));
323  AssertIndexRange(direction, dim);
324  constexpr int mm = contract_over_rows ? n_rows : n_columns,
325  nn = contract_over_rows ? n_columns : n_rows;
326 
327  constexpr int stride = Utilities::pow(n_columns, direction);
328  constexpr int n_blocks1 = one_line ? 1 : stride;
329  constexpr int n_blocks2 =
330  Utilities::pow(n_rows, (direction >= dim) ? 0 : (dim - direction - 1));
331 
332  for (int i2 = 0; i2 < n_blocks2; ++i2)
333  {
334  for (int i1 = 0; i1 < n_blocks1; ++i1)
335  {
336  Number x[mm];
337  for (int i = 0; i < mm; ++i)
338  x[i] = in[stride * i];
339  for (int col = 0; col < nn; ++col)
340  {
341  Number2 val0;
342  if (contract_over_rows == true)
343  val0 = shape_data[col];
344  else
345  val0 = shape_data[col * n_columns];
346  Number res0 = val0 * x[0];
347  for (int i = 1; i < mm; ++i)
348  {
349  if (contract_over_rows == true)
350  val0 = shape_data[i * n_columns + col];
351  else
352  val0 = shape_data[col * n_columns + i];
353  res0 += val0 * x[i];
354  }
355  if (add == false)
356  out[stride * col] = res0;
357  else
358  out[stride * col] += res0;
359  }
360 
361  if (one_line == false)
362  {
363  ++in;
364  ++out;
365  }
366  }
367  if (one_line == false)
368  {
369  in += stride * (mm - 1);
370  out += stride * (nn - 1);
371  }
372  }
373  }
374 
375 
376 
377  template <int dim,
378  int n_rows,
379  int n_columns,
380  typename Number,
381  typename Number2>
382  template <int face_direction,
383  bool contract_onto_face,
384  bool add,
385  int max_derivative>
386  inline void
388  dim,
389  n_rows,
390  n_columns,
391  Number,
392  Number2>::apply_face(const Number *DEAL_II_RESTRICT in,
393  Number *DEAL_II_RESTRICT
394  out) const
395  {
396  static_assert(dim > 0 && dim < 4, "Only dim=1,2,3 supported");
397  static_assert(max_derivative >= 0 && max_derivative < 3,
398  "Only derivative orders 0-2 implemented");
399  Assert(shape_values != nullptr,
400  ExcMessage(
401  "The given array shape_values must not be the null pointer."));
402 
403  constexpr int n_blocks1 = dim > 1 ? n_rows : 1;
404  constexpr int n_blocks2 = dim > 2 ? n_rows : 1;
405 
406  AssertIndexRange(face_direction, dim);
407  constexpr int stride = Utilities::pow(n_rows, face_direction);
408  constexpr int out_stride = Utilities::pow(n_rows, dim - 1);
409  const Number *DEAL_II_RESTRICT shape_values = this->shape_values;
410 
411  for (int i2 = 0; i2 < n_blocks2; ++i2)
412  {
413  for (int i1 = 0; i1 < n_blocks1; ++i1)
414  {
415  if (contract_onto_face == true)
416  {
417  Number res0 = shape_values[0] * in[0];
418  Number res1, res2;
419  if (max_derivative > 0)
420  res1 = shape_values[n_rows] * in[0];
421  if (max_derivative > 1)
422  res2 = shape_values[2 * n_rows] * in[0];
423  for (int ind = 1; ind < n_rows; ++ind)
424  {
425  res0 += shape_values[ind] * in[stride * ind];
426  if (max_derivative > 0)
427  res1 += shape_values[ind + n_rows] * in[stride * ind];
428  if (max_derivative > 1)
429  res2 += shape_values[ind + 2 * n_rows] * in[stride * ind];
430  }
431  if (add == false)
432  {
433  out[0] = res0;
434  if (max_derivative > 0)
435  out[out_stride] = res1;
436  if (max_derivative > 1)
437  out[2 * out_stride] = res2;
438  }
439  else
440  {
441  out[0] += res0;
442  if (max_derivative > 0)
443  out[out_stride] += res1;
444  if (max_derivative > 1)
445  out[2 * out_stride] += res2;
446  }
447  }
448  else
449  {
450  for (int col = 0; col < n_rows; ++col)
451  {
452  if (add == false)
453  out[col * stride] = shape_values[col] * in[0];
454  else
455  out[col * stride] += shape_values[col] * in[0];
456  if (max_derivative > 0)
457  out[col * stride] +=
458  shape_values[col + n_rows] * in[out_stride];
459  if (max_derivative > 1)
460  out[col * stride] +=
461  shape_values[col + 2 * n_rows] * in[2 * out_stride];
462  }
463  }
464 
465  // increment: in regular case, just go to the next point in
466  // x-direction. If we are at the end of one chunk in x-dir, need
467  // to jump over to the next layer in z-direction
468  switch (face_direction)
469  {
470  case 0:
471  in += contract_onto_face ? n_rows : 1;
472  out += contract_onto_face ? 1 : n_rows;
473  break;
474  case 1:
475  ++in;
476  ++out;
477  // faces 2 and 3 in 3D use local coordinate system zx, which
478  // is the other way around compared to the tensor
479  // product. Need to take that into account.
480  if (dim == 3)
481  {
482  if (contract_onto_face)
483  out += n_rows - 1;
484  else
485  in += n_rows - 1;
486  }
487  break;
488  case 2:
489  ++in;
490  ++out;
491  break;
492  default:
493  Assert(false, ExcNotImplemented());
494  }
495  }
496  if (face_direction == 1 && dim == 3)
497  {
498  // adjust for local coordinate system zx
499  if (contract_onto_face)
500  {
501  in += n_rows * (n_rows - 1);
502  out -= n_rows * n_rows - 1;
503  }
504  else
505  {
506  out += n_rows * (n_rows - 1);
507  in -= n_rows * n_rows - 1;
508  }
509  }
510  }
511  }
512 
513 
514 
528  template <int dim, typename Number, typename Number2>
529  struct EvaluatorTensorProduct<evaluate_general, dim, 0, 0, Number, Number2>
530  {
531  static constexpr unsigned int n_rows_of_product =
533  static constexpr unsigned int n_columns_of_product =
535 
541  : shape_values(nullptr)
542  , shape_gradients(nullptr)
543  , shape_hessians(nullptr)
544  , n_rows(numbers::invalid_unsigned_int)
545  , n_columns(numbers::invalid_unsigned_int)
546  {}
547 
552  const AlignedVector<Number2> &shape_gradients,
553  const AlignedVector<Number2> &shape_hessians,
554  const unsigned int n_rows,
555  const unsigned int n_columns)
556  : shape_values(shape_values.begin())
557  , shape_gradients(shape_gradients.begin())
558  , shape_hessians(shape_hessians.begin())
559  , n_rows(n_rows)
560  , n_columns(n_columns)
561  {
562  // We can enter this function either for the apply() path that has
563  // n_rows * n_columns entries or for the apply_face() path that only has
564  // n_rows * 3 entries in the array. Since we cannot decide about the use
565  // we must allow for both here.
566  Assert(shape_values.size() == 0 ||
567  shape_values.size() == n_rows * n_columns ||
568  shape_values.size() == n_rows * 3,
569  ExcDimensionMismatch(shape_values.size(), n_rows * n_columns));
570  Assert(shape_gradients.size() == 0 ||
571  shape_gradients.size() == n_rows * n_columns,
572  ExcDimensionMismatch(shape_gradients.size(), n_rows * n_columns));
573  Assert(shape_hessians.size() == 0 ||
574  shape_hessians.size() == n_rows * n_columns,
575  ExcDimensionMismatch(shape_hessians.size(), n_rows * n_columns));
576  }
577 
578  template <int direction, bool contract_over_rows, bool add>
579  void
580  values(const Number *in, Number *out) const
581  {
582  apply<direction, contract_over_rows, add>(shape_values, in, out);
583  }
584 
585  template <int direction, bool contract_over_rows, bool add>
586  void
587  gradients(const Number *in, Number *out) const
588  {
589  apply<direction, contract_over_rows, add>(shape_gradients, in, out);
590  }
591 
592  template <int direction, bool contract_over_rows, bool add>
593  void
594  hessians(const Number *in, Number *out) const
595  {
596  apply<direction, contract_over_rows, add>(shape_hessians, in, out);
597  }
598 
599  template <int direction, bool contract_over_rows, bool add>
600  void
601  values_one_line(const Number in[], Number out[]) const
602  {
603  Assert(shape_values != nullptr, ExcNotInitialized());
604  apply<direction, contract_over_rows, add, true>(shape_values, in, out);
605  }
606 
607  template <int direction, bool contract_over_rows, bool add>
608  void
609  gradients_one_line(const Number in[], Number out[]) const
610  {
611  Assert(shape_gradients != nullptr, ExcNotInitialized());
612  apply<direction, contract_over_rows, add, true>(shape_gradients, in, out);
613  }
614 
615  template <int direction, bool contract_over_rows, bool add>
616  void
617  hessians_one_line(const Number in[], Number out[]) const
618  {
619  Assert(shape_hessians != nullptr, ExcNotInitialized());
620  apply<direction, contract_over_rows, add, true>(shape_hessians, in, out);
621  }
622 
623  template <int direction,
624  bool contract_over_rows,
625  bool add,
626  bool one_line = false>
627  void
628  apply(const Number2 *DEAL_II_RESTRICT shape_data,
629  const Number * in,
630  Number * out) const;
631 
632  template <int face_direction,
633  bool contract_onto_face,
634  bool add,
635  int max_derivative>
636  void
637  apply_face(const Number *DEAL_II_RESTRICT in,
638  Number *DEAL_II_RESTRICT out) const;
639 
640  const Number2 * shape_values;
641  const Number2 * shape_gradients;
642  const Number2 * shape_hessians;
643  const unsigned int n_rows;
644  const unsigned int n_columns;
645  };
646 
647 
648 
649  template <int dim, typename Number, typename Number2>
650  template <int direction, bool contract_over_rows, bool add, bool one_line>
651  inline void
653  const Number2 *DEAL_II_RESTRICT shape_data,
654  const Number * in,
655  Number * out) const
656  {
657  static_assert(one_line == false || direction == dim - 1,
658  "Single-line evaluation only works for direction=dim-1.");
659  Assert(shape_data != nullptr,
660  ExcMessage(
661  "The given array shape_data must not be the null pointer!"));
662  Assert(dim == direction + 1 || one_line == true || n_rows == n_columns ||
663  in != out,
664  ExcMessage("In-place operation only supported for "
665  "n_rows==n_columns or single-line interpolation"));
666  AssertIndexRange(direction, dim);
667  const int mm = contract_over_rows ? n_rows : n_columns,
668  nn = contract_over_rows ? n_columns : n_rows;
669 
670  const int stride =
671  direction == 0 ? 1 : Utilities::fixed_power<direction>(n_columns);
672  const int n_blocks1 = one_line ? 1 : stride;
673  const int n_blocks2 = direction >= dim - 1 ?
674  1 :
675  Utilities::fixed_power<dim - direction - 1>(n_rows);
676  Assert(n_rows <= 128, ExcNotImplemented());
677 
678  for (int i2 = 0; i2 < n_blocks2; ++i2)
679  {
680  for (int i1 = 0; i1 < n_blocks1; ++i1)
681  {
682  Number x[129];
683  for (int i = 0; i < mm; ++i)
684  x[i] = in[stride * i];
685  for (int col = 0; col < nn; ++col)
686  {
687  Number2 val0;
688  if (contract_over_rows == true)
689  val0 = shape_data[col];
690  else
691  val0 = shape_data[col * n_columns];
692  Number res0 = val0 * x[0];
693  for (int i = 1; i < mm; ++i)
694  {
695  if (contract_over_rows == true)
696  val0 = shape_data[i * n_columns + col];
697  else
698  val0 = shape_data[col * n_columns + i];
699  res0 += val0 * x[i];
700  }
701  if (add == false)
702  out[stride * col] = res0;
703  else
704  out[stride * col] += res0;
705  }
706 
707  if (one_line == false)
708  {
709  ++in;
710  ++out;
711  }
712  }
713  if (one_line == false)
714  {
715  in += stride * (mm - 1);
716  out += stride * (nn - 1);
717  }
718  }
719  }
720 
721 
722 
723  template <int dim, typename Number, typename Number2>
724  template <int face_direction,
725  bool contract_onto_face,
726  bool add,
727  int max_derivative>
728  inline void
730  apply_face(const Number *DEAL_II_RESTRICT in,
731  Number *DEAL_II_RESTRICT out) const
732  {
733  Assert(shape_values != nullptr,
734  ExcMessage(
735  "The given array shape_data must not be the null pointer!"));
736  static_assert(dim > 0 && dim < 4, "Only dim=1,2,3 supported");
737  const int n_blocks1 = dim > 1 ? n_rows : 1;
738  const int n_blocks2 = dim > 2 ? n_rows : 1;
739 
740  AssertIndexRange(face_direction, dim);
741  const int stride =
742  face_direction > 0 ? Utilities::fixed_power<face_direction>(n_rows) : 1;
743  const int out_stride =
744  dim > 1 ? Utilities::fixed_power<dim - 1>(n_rows) : 1;
745 
746  for (int i2 = 0; i2 < n_blocks2; ++i2)
747  {
748  for (int i1 = 0; i1 < n_blocks1; ++i1)
749  {
750  if (contract_onto_face == true)
751  {
752  Number res0 = shape_values[0] * in[0];
753  Number res1, res2;
754  if (max_derivative > 0)
755  res1 = shape_values[n_rows] * in[0];
756  if (max_derivative > 1)
757  res2 = shape_values[2 * n_rows] * in[0];
758  for (unsigned int ind = 1; ind < n_rows; ++ind)
759  {
760  res0 += shape_values[ind] * in[stride * ind];
761  if (max_derivative > 0)
762  res1 += shape_values[ind + n_rows] * in[stride * ind];
763  if (max_derivative > 1)
764  res2 += shape_values[ind + 2 * n_rows] * in[stride * ind];
765  }
766  if (add == false)
767  {
768  out[0] = res0;
769  if (max_derivative > 0)
770  out[out_stride] = res1;
771  if (max_derivative > 1)
772  out[2 * out_stride] = res2;
773  }
774  else
775  {
776  out[0] += res0;
777  if (max_derivative > 0)
778  out[out_stride] += res1;
779  if (max_derivative > 1)
780  out[2 * out_stride] += res2;
781  }
782  }
783  else
784  {
785  for (unsigned int col = 0; col < n_rows; ++col)
786  {
787  if (add == false)
788  out[col * stride] = shape_values[col] * in[0];
789  else
790  out[col * stride] += shape_values[col] * in[0];
791  if (max_derivative > 0)
792  out[col * stride] +=
793  shape_values[col + n_rows] * in[out_stride];
794  if (max_derivative > 1)
795  out[col * stride] +=
796  shape_values[col + 2 * n_rows] * in[2 * out_stride];
797  }
798  }
799 
800  // increment: in regular case, just go to the next point in
801  // x-direction. If we are at the end of one chunk in x-dir, need
802  // to jump over to the next layer in z-direction
803  switch (face_direction)
804  {
805  case 0:
806  in += contract_onto_face ? n_rows : 1;
807  out += contract_onto_face ? 1 : n_rows;
808  break;
809  case 1:
810  ++in;
811  ++out;
812  // faces 2 and 3 in 3D use local coordinate system zx, which
813  // is the other way around compared to the tensor
814  // product. Need to take that into account.
815  if (dim == 3)
816  {
817  if (contract_onto_face)
818  out += n_rows - 1;
819  else
820  in += n_rows - 1;
821  }
822  break;
823  case 2:
824  ++in;
825  ++out;
826  break;
827  default:
828  Assert(false, ExcNotImplemented());
829  }
830  }
831  if (face_direction == 1 && dim == 3)
832  {
833  // adjust for local coordinate system zx
834  if (contract_onto_face)
835  {
836  in += n_rows * (n_rows - 1);
837  out -= n_rows * n_rows - 1;
838  }
839  else
840  {
841  out += n_rows * (n_rows - 1);
842  in -= n_rows * n_rows - 1;
843  }
844  }
845  }
846  }
847 
848 
849 
870  template <int dim,
871  int n_rows,
872  int n_columns,
873  typename Number,
874  typename Number2>
876  dim,
877  n_rows,
878  n_columns,
879  Number,
880  Number2>
881  {
882  static constexpr unsigned int n_rows_of_product =
883  Utilities::pow(n_rows, dim);
884  static constexpr unsigned int n_columns_of_product =
885  Utilities::pow(n_columns, dim);
886 
891  const AlignedVector<Number2> &shape_gradients,
892  const AlignedVector<Number2> &shape_hessians,
893  const unsigned int dummy1 = 0,
894  const unsigned int dummy2 = 0)
895  : shape_values(shape_values.begin())
896  , shape_gradients(shape_gradients.begin())
897  , shape_hessians(shape_hessians.begin())
898  {
899  Assert(shape_values.size() == 0 ||
900  shape_values.size() == n_rows * n_columns,
901  ExcDimensionMismatch(shape_values.size(), n_rows * n_columns));
902  Assert(shape_gradients.size() == 0 ||
903  shape_gradients.size() == n_rows * n_columns,
904  ExcDimensionMismatch(shape_gradients.size(), n_rows * n_columns));
905  Assert(shape_hessians.size() == 0 ||
906  shape_hessians.size() == n_rows * n_columns,
907  ExcDimensionMismatch(shape_hessians.size(), n_rows * n_columns));
908  (void)dummy1;
909  (void)dummy2;
910  }
911 
912  template <int direction, bool contract_over_rows, bool add>
913  void
914  values(const Number in[], Number out[]) const;
915 
916  template <int direction, bool contract_over_rows, bool add>
917  void
918  gradients(const Number in[], Number out[]) const;
919 
920  template <int direction, bool contract_over_rows, bool add>
921  void
922  hessians(const Number in[], Number out[]) const;
923 
924  const Number2 *shape_values;
925  const Number2 *shape_gradients;
926  const Number2 *shape_hessians;
927  };
928 
929 
930 
931  // In this case, the 1D shape values read (sorted lexicographically, rows
932  // run over 1D dofs, columns over quadrature points):
933  // Q2 --> [ 0.687 0 -0.087 ]
934  // [ 0.4 1 0.4 ]
935  // [-0.087 0 0.687 ]
936  // Q3 --> [ 0.66 0.003 0.002 0.049 ]
937  // [ 0.521 1.005 -0.01 -0.230 ]
938  // [-0.230 -0.01 1.005 0.521 ]
939  // [ 0.049 0.002 0.003 0.66 ]
940  // Q4 --> [ 0.658 0.022 0 -0.007 -0.032 ]
941  // [ 0.608 1.059 0 0.039 0.176 ]
942  // [-0.409 -0.113 1 -0.113 -0.409 ]
943  // [ 0.176 0.039 0 1.059 0.608 ]
944  // [-0.032 -0.007 0 0.022 0.658 ]
945  //
946  // In these matrices, we want to use avoid computations involving zeros and
947  // ones and in addition use the symmetry in entries to reduce the number of
948  // read operations.
949  template <int dim,
950  int n_rows,
951  int n_columns,
952  typename Number,
953  typename Number2>
954  template <int direction, bool contract_over_rows, bool add>
955  inline void
957  dim,
958  n_rows,
959  n_columns,
960  Number,
961  Number2>::values(const Number in[], Number out[]) const
962  {
963  Assert(shape_values != nullptr, ExcNotInitialized());
964  AssertIndexRange(direction, dim);
965  constexpr int mm = contract_over_rows ? n_rows : n_columns,
966  nn = contract_over_rows ? n_columns : n_rows;
967  constexpr int n_cols = nn / 2;
968  constexpr int mid = mm / 2;
969 
970  constexpr int stride = Utilities::pow(n_columns, direction);
971  constexpr int n_blocks1 = stride;
972  constexpr int n_blocks2 =
973  Utilities::pow(n_rows, (direction >= dim) ? 0 : (dim - direction - 1));
974 
975  for (int i2 = 0; i2 < n_blocks2; ++i2)
976  {
977  for (int i1 = 0; i1 < n_blocks1; ++i1)
978  {
979  for (int col = 0; col < n_cols; ++col)
980  {
981  Number2 val0, val1;
982  Number in0, in1, res0, res1;
983  if (contract_over_rows == true)
984  {
985  val0 = shape_values[col];
986  val1 = shape_values[nn - 1 - col];
987  }
988  else
989  {
990  val0 = shape_values[col * n_columns];
991  val1 = shape_values[(col + 1) * n_columns - 1];
992  }
993  if (mid > 0)
994  {
995  in0 = in[0];
996  in1 = in[stride * (mm - 1)];
997  res0 = val0 * in0;
998  res1 = val1 * in0;
999  res0 += val1 * in1;
1000  res1 += val0 * in1;
1001  for (int ind = 1; ind < mid; ++ind)
1002  {
1003  if (contract_over_rows == true)
1004  {
1005  val0 = shape_values[ind * n_columns + col];
1006  val1 = shape_values[ind * n_columns + nn - 1 - col];
1007  }
1008  else
1009  {
1010  val0 = shape_values[col * n_columns + ind];
1011  val1 =
1012  shape_values[(col + 1) * n_columns - 1 - ind];
1013  }
1014  in0 = in[stride * ind];
1015  in1 = in[stride * (mm - 1 - ind)];
1016  res0 += val0 * in0;
1017  res1 += val1 * in0;
1018  res0 += val1 * in1;
1019  res1 += val0 * in1;
1020  }
1021  }
1022  else
1023  res0 = res1 = Number();
1024  if (contract_over_rows == true)
1025  {
1026  if (mm % 2 == 1)
1027  {
1028  val0 = shape_values[mid * n_columns + col];
1029  in1 = val0 * in[stride * mid];
1030  res0 += in1;
1031  res1 += in1;
1032  }
1033  }
1034  else
1035  {
1036  if (mm % 2 == 1 && nn % 2 == 0)
1037  {
1038  val0 = shape_values[col * n_columns + mid];
1039  in1 = val0 * in[stride * mid];
1040  res0 += in1;
1041  res1 += in1;
1042  }
1043  }
1044  if (add == false)
1045  {
1046  out[stride * col] = res0;
1047  out[stride * (nn - 1 - col)] = res1;
1048  }
1049  else
1050  {
1051  out[stride * col] += res0;
1052  out[stride * (nn - 1 - col)] += res1;
1053  }
1054  }
1055  if (contract_over_rows == true && nn % 2 == 1 && mm % 2 == 1)
1056  {
1057  if (add == false)
1058  out[stride * n_cols] = in[stride * mid];
1059  else
1060  out[stride * n_cols] += in[stride * mid];
1061  }
1062  else if (contract_over_rows == true && nn % 2 == 1)
1063  {
1064  Number res0;
1065  Number2 val0 = shape_values[n_cols];
1066  if (mid > 0)
1067  {
1068  res0 = val0 * (in[0] + in[stride * (mm - 1)]);
1069  for (int ind = 1; ind < mid; ++ind)
1070  {
1071  val0 = shape_values[ind * n_columns + n_cols];
1072  res0 += val0 * (in[stride * ind] +
1073  in[stride * (mm - 1 - ind)]);
1074  }
1075  }
1076  else
1077  res0 = Number();
1078  if (add == false)
1079  out[stride * n_cols] = res0;
1080  else
1081  out[stride * n_cols] += res0;
1082  }
1083  else if (contract_over_rows == false && nn % 2 == 1)
1084  {
1085  Number res0;
1086  if (mid > 0)
1087  {
1088  Number2 val0 = shape_values[n_cols * n_columns];
1089  res0 = val0 * (in[0] + in[stride * (mm - 1)]);
1090  for (int ind = 1; ind < mid; ++ind)
1091  {
1092  val0 = shape_values[n_cols * n_columns + ind];
1093  Number in1 = val0 * (in[stride * ind] +
1094  in[stride * (mm - 1 - ind)]);
1095  res0 += in1;
1096  }
1097  if (mm % 2)
1098  res0 += in[stride * mid];
1099  }
1100  else
1101  res0 = in[0];
1102  if (add == false)
1103  out[stride * n_cols] = res0;
1104  else
1105  out[stride * n_cols] += res0;
1106  }
1107 
1108  ++in;
1109  ++out;
1110  }
1111  in += stride * (mm - 1);
1112  out += stride * (nn - 1);
1113  }
1114  }
1115 
1116 
1117 
1118  // For the specialized loop used for the gradient computation in
1119  // here, the 1D shape values read (sorted lexicographically, rows
1120  // run over 1D dofs, columns over quadrature points):
1121  // Q2 --> [-2.549 -1 0.549 ]
1122  // [ 3.098 0 -3.098 ]
1123  // [-0.549 1 2.549 ]
1124  // Q3 --> [-4.315 -1.03 0.5 -0.44 ]
1125  // [ 6.07 -1.44 -2.97 2.196 ]
1126  // [-2.196 2.97 1.44 -6.07 ]
1127  // [ 0.44 -0.5 1.03 4.315 ]
1128  // Q4 --> [-6.316 -1.3 0.333 -0.353 0.413 ]
1129  // [10.111 -2.76 -2.667 2.066 -2.306 ]
1130  // [-5.688 5.773 0 -5.773 5.688 ]
1131  // [ 2.306 -2.066 2.667 2.76 -10.111 ]
1132  // [-0.413 0.353 -0.333 -0.353 0.413 ]
1133  //
1134  // In these matrices, we want to use avoid computations involving
1135  // zeros and ones and in addition use the symmetry in entries to
1136  // reduce the number of read operations.
1137  template <int dim,
1138  int n_rows,
1139  int n_columns,
1140  typename Number,
1141  typename Number2>
1142  template <int direction, bool contract_over_rows, bool add>
1143  inline void
1145  dim,
1146  n_rows,
1147  n_columns,
1148  Number,
1149  Number2>::gradients(const Number in[],
1150  Number out[]) const
1151  {
1152  Assert(shape_gradients != nullptr, ExcNotInitialized());
1153  AssertIndexRange(direction, dim);
1154  constexpr int mm = contract_over_rows ? n_rows : n_columns,
1155  nn = contract_over_rows ? n_columns : n_rows;
1156  constexpr int n_cols = nn / 2;
1157  constexpr int mid = mm / 2;
1158 
1159  constexpr int stride = Utilities::pow(n_columns, direction);
1160  constexpr int n_blocks1 = stride;
1161  constexpr int n_blocks2 =
1162  Utilities::pow(n_rows, (direction >= dim) ? 0 : (dim - direction - 1));
1163 
1164  for (int i2 = 0; i2 < n_blocks2; ++i2)
1165  {
1166  for (int i1 = 0; i1 < n_blocks1; ++i1)
1167  {
1168  for (int col = 0; col < n_cols; ++col)
1169  {
1170  Number2 val0, val1;
1171  Number in0, in1, res0, res1;
1172  if (contract_over_rows == true)
1173  {
1174  val0 = shape_gradients[col];
1175  val1 = shape_gradients[nn - 1 - col];
1176  }
1177  else
1178  {
1179  val0 = shape_gradients[col * n_columns];
1180  val1 = shape_gradients[(nn - col - 1) * n_columns];
1181  }
1182  if (mid > 0)
1183  {
1184  in0 = in[0];
1185  in1 = in[stride * (mm - 1)];
1186  res0 = val0 * in0;
1187  res1 = val1 * in0;
1188  res0 -= val1 * in1;
1189  res1 -= val0 * in1;
1190  for (int ind = 1; ind < mid; ++ind)
1191  {
1192  if (contract_over_rows == true)
1193  {
1194  val0 = shape_gradients[ind * n_columns + col];
1195  val1 =
1196  shape_gradients[ind * n_columns + nn - 1 - col];
1197  }
1198  else
1199  {
1200  val0 = shape_gradients[col * n_columns + ind];
1201  val1 =
1202  shape_gradients[(nn - col - 1) * n_columns + ind];
1203  }
1204  in0 = in[stride * ind];
1205  in1 = in[stride * (mm - 1 - ind)];
1206  res0 += val0 * in0;
1207  res1 += val1 * in0;
1208  res0 -= val1 * in1;
1209  res1 -= val0 * in1;
1210  }
1211  }
1212  else
1213  res0 = res1 = Number();
1214  if (mm % 2 == 1)
1215  {
1216  if (contract_over_rows == true)
1217  val0 = shape_gradients[mid * n_columns + col];
1218  else
1219  val0 = shape_gradients[col * n_columns + mid];
1220  in1 = val0 * in[stride * mid];
1221  res0 += in1;
1222  res1 -= in1;
1223  }
1224  if (add == false)
1225  {
1226  out[stride * col] = res0;
1227  out[stride * (nn - 1 - col)] = res1;
1228  }
1229  else
1230  {
1231  out[stride * col] += res0;
1232  out[stride * (nn - 1 - col)] += res1;
1233  }
1234  }
1235  if (nn % 2 == 1)
1236  {
1237  Number2 val0;
1238  Number res0;
1239  if (contract_over_rows == true)
1240  val0 = shape_gradients[n_cols];
1241  else
1242  val0 = shape_gradients[n_cols * n_columns];
1243  res0 = val0 * (in[0] - in[stride * (mm - 1)]);
1244  for (int ind = 1; ind < mid; ++ind)
1245  {
1246  if (contract_over_rows == true)
1247  val0 = shape_gradients[ind * n_columns + n_cols];
1248  else
1249  val0 = shape_gradients[n_cols * n_columns + ind];
1250  Number in1 =
1251  val0 * (in[stride * ind] - in[stride * (mm - 1 - ind)]);
1252  res0 += in1;
1253  }
1254  if (add == false)
1255  out[stride * n_cols] = res0;
1256  else
1257  out[stride * n_cols] += res0;
1258  }
1259 
1260  ++in;
1261  ++out;
1262  }
1263  in += stride * (mm - 1);
1264  out += stride * (nn - 1);
1265  }
1266  }
1267 
1268 
1269 
1270  // evaluates the given shape data in 1d-3d using the tensor product
1271  // form assuming the symmetries of unit cell shape hessians for
1272  // finite elements in FEEvaluation
1273  template <int dim,
1274  int n_rows,
1275  int n_columns,
1276  typename Number,
1277  typename Number2>
1278  template <int direction, bool contract_over_rows, bool add>
1279  inline void
1281  dim,
1282  n_rows,
1283  n_columns,
1284  Number,
1285  Number2>::hessians(const Number in[],
1286  Number out[]) const
1287  {
1288  Assert(shape_hessians != nullptr, ExcNotInitialized());
1289  AssertIndexRange(direction, dim);
1290  constexpr int mm = contract_over_rows ? n_rows : n_columns;
1291  constexpr int nn = contract_over_rows ? n_columns : n_rows;
1292  constexpr int n_cols = nn / 2;
1293  constexpr int mid = mm / 2;
1294 
1295  constexpr int stride = Utilities::pow(n_columns, direction);
1296  constexpr int n_blocks1 = stride;
1297  constexpr int n_blocks2 =
1298  Utilities::pow(n_rows, (direction >= dim) ? 0 : (dim - direction - 1));
1299 
1300  for (int i2 = 0; i2 < n_blocks2; ++i2)
1301  {
1302  for (int i1 = 0; i1 < n_blocks1; ++i1)
1303  {
1304  for (int col = 0; col < n_cols; ++col)
1305  {
1306  Number2 val0, val1;
1307  Number in0, in1, res0, res1;
1308  if (contract_over_rows == true)
1309  {
1310  val0 = shape_hessians[col];
1311  val1 = shape_hessians[nn - 1 - col];
1312  }
1313  else
1314  {
1315  val0 = shape_hessians[col * n_columns];
1316  val1 = shape_hessians[(col + 1) * n_columns - 1];
1317  }
1318  if (mid > 0)
1319  {
1320  in0 = in[0];
1321  in1 = in[stride * (mm - 1)];
1322  res0 = val0 * in0;
1323  res1 = val1 * in0;
1324  res0 += val1 * in1;
1325  res1 += val0 * in1;
1326  for (int ind = 1; ind < mid; ++ind)
1327  {
1328  if (contract_over_rows == true)
1329  {
1330  val0 = shape_hessians[ind * n_columns + col];
1331  val1 =
1332  shape_hessians[ind * n_columns + nn - 1 - col];
1333  }
1334  else
1335  {
1336  val0 = shape_hessians[col * n_columns + ind];
1337  val1 =
1338  shape_hessians[(col + 1) * n_columns - 1 - ind];
1339  }
1340  in0 = in[stride * ind];
1341  in1 = in[stride * (mm - 1 - ind)];
1342  res0 += val0 * in0;
1343  res1 += val1 * in0;
1344  res0 += val1 * in1;
1345  res1 += val0 * in1;
1346  }
1347  }
1348  else
1349  res0 = res1 = Number();
1350  if (mm % 2 == 1)
1351  {
1352  if (contract_over_rows == true)
1353  val0 = shape_hessians[mid * n_columns + col];
1354  else
1355  val0 = shape_hessians[col * n_columns + mid];
1356  in1 = val0 * in[stride * mid];
1357  res0 += in1;
1358  res1 += in1;
1359  }
1360  if (add == false)
1361  {
1362  out[stride * col] = res0;
1363  out[stride * (nn - 1 - col)] = res1;
1364  }
1365  else
1366  {
1367  out[stride * col] += res0;
1368  out[stride * (nn - 1 - col)] += res1;
1369  }
1370  }
1371  if (nn % 2 == 1)
1372  {
1373  Number2 val0;
1374  Number res0;
1375  if (contract_over_rows == true)
1376  val0 = shape_hessians[n_cols];
1377  else
1378  val0 = shape_hessians[n_cols * n_columns];
1379  if (mid > 0)
1380  {
1381  res0 = val0 * (in[0] + in[stride * (mm - 1)]);
1382  for (int ind = 1; ind < mid; ++ind)
1383  {
1384  if (contract_over_rows == true)
1385  val0 = shape_hessians[ind * n_columns + n_cols];
1386  else
1387  val0 = shape_hessians[n_cols * n_columns + ind];
1388  Number in1 = val0 * (in[stride * ind] +
1389  in[stride * (mm - 1 - ind)]);
1390  res0 += in1;
1391  }
1392  }
1393  else
1394  res0 = Number();
1395  if (mm % 2 == 1)
1396  {
1397  if (contract_over_rows == true)
1398  val0 = shape_hessians[mid * n_columns + n_cols];
1399  else
1400  val0 = shape_hessians[n_cols * n_columns + mid];
1401  res0 += val0 * in[stride * mid];
1402  }
1403  if (add == false)
1404  out[stride * n_cols] = res0;
1405  else
1406  out[stride * n_cols] += res0;
1407  }
1408 
1409  ++in;
1410  ++out;
1411  }
1412  in += stride * (mm - 1);
1413  out += stride * (nn - 1);
1414  }
1415  }
1416 
1417 
1418 
1450  template <int dim,
1451  int n_rows,
1452  int n_columns,
1453  typename Number,
1454  typename Number2>
1456  dim,
1457  n_rows,
1458  n_columns,
1459  Number,
1460  Number2>
1461  {
1462  static constexpr unsigned int n_rows_of_product =
1463  Utilities::pow(n_rows, dim);
1464  static constexpr unsigned int n_columns_of_product =
1465  Utilities::pow(n_columns, dim);
1466 
1473  : shape_values(nullptr)
1474  , shape_gradients(nullptr)
1475  , shape_hessians(nullptr)
1476  {}
1477 
1483  : shape_values(shape_values.begin())
1484  , shape_gradients(nullptr)
1485  , shape_hessians(nullptr)
1486  {
1487  AssertDimension(shape_values.size(), n_rows * ((n_columns + 1) / 2));
1488  }
1489 
1495  const AlignedVector<Number2> &shape_gradients,
1496  const AlignedVector<Number2> &shape_hessians,
1497  const unsigned int dummy1 = 0,
1498  const unsigned int dummy2 = 0)
1499  : shape_values(shape_values.begin())
1500  , shape_gradients(shape_gradients.begin())
1501  , shape_hessians(shape_hessians.begin())
1502  {
1503  // In this function, we allow for dummy pointers if some of values,
1504  // gradients or hessians should not be computed
1505  if (!shape_values.empty())
1506  AssertDimension(shape_values.size(), n_rows * ((n_columns + 1) / 2));
1507  if (!shape_gradients.empty())
1508  AssertDimension(shape_gradients.size(), n_rows * ((n_columns + 1) / 2));
1509  if (!shape_hessians.empty())
1510  AssertDimension(shape_hessians.size(), n_rows * ((n_columns + 1) / 2));
1511  (void)dummy1;
1512  (void)dummy2;
1513  }
1514 
1515  template <int direction, bool contract_over_rows, bool add>
1516  void
1517  values(const Number in[], Number out[]) const
1518  {
1519  Assert(shape_values != nullptr, ExcNotInitialized());
1520  apply<direction, contract_over_rows, add, 0>(shape_values, in, out);
1521  }
1522 
1523  template <int direction, bool contract_over_rows, bool add>
1524  void
1525  gradients(const Number in[], Number out[]) const
1526  {
1527  Assert(shape_gradients != nullptr, ExcNotInitialized());
1528  apply<direction, contract_over_rows, add, 1>(shape_gradients, in, out);
1529  }
1530 
1531  template <int direction, bool contract_over_rows, bool add>
1532  void
1533  hessians(const Number in[], Number out[]) const
1534  {
1535  Assert(shape_hessians != nullptr, ExcNotInitialized());
1536  apply<direction, contract_over_rows, add, 2>(shape_hessians, in, out);
1537  }
1538 
1539  template <int direction, bool contract_over_rows, bool add>
1540  void
1541  values_one_line(const Number in[], Number out[]) const
1542  {
1543  Assert(shape_values != nullptr, ExcNotInitialized());
1544  apply<direction, contract_over_rows, add, 0, true>(shape_values, in, out);
1545  }
1546 
1547  template <int direction, bool contract_over_rows, bool add>
1548  void
1549  gradients_one_line(const Number in[], Number out[]) const
1550  {
1551  Assert(shape_gradients != nullptr, ExcNotInitialized());
1552  apply<direction, contract_over_rows, add, 1, true>(shape_gradients,
1553  in,
1554  out);
1555  }
1556 
1557  template <int direction, bool contract_over_rows, bool add>
1558  void
1559  hessians_one_line(const Number in[], Number out[]) const
1560  {
1561  Assert(shape_hessians != nullptr, ExcNotInitialized());
1562  apply<direction, contract_over_rows, add, 2, true>(shape_hessians,
1563  in,
1564  out);
1565  }
1566 
1595  template <int direction,
1596  bool contract_over_rows,
1597  bool add,
1598  int type,
1599  bool one_line = false>
1600  static void
1601  apply(const Number2 *DEAL_II_RESTRICT shape_data,
1602  const Number * in,
1603  Number * out);
1604 
1605  const Number2 *shape_values;
1606  const Number2 *shape_gradients;
1607  const Number2 *shape_hessians;
1608  };
1609 
1610 
1611 
1612  template <int dim,
1613  int n_rows,
1614  int n_columns,
1615  typename Number,
1616  typename Number2>
1617  template <int direction,
1618  bool contract_over_rows,
1619  bool add,
1620  int type,
1621  bool one_line>
1622  inline void
1624  dim,
1625  n_rows,
1626  n_columns,
1627  Number,
1628  Number2>::apply(const Number2 *DEAL_II_RESTRICT shapes,
1629  const Number * in,
1630  Number * out)
1631  {
1632  static_assert(type < 3, "Only three variants type=0,1,2 implemented");
1633  static_assert(one_line == false || direction == dim - 1,
1634  "Single-line evaluation only works for direction=dim-1.");
1635  Assert(dim == direction + 1 || one_line == true || n_rows == n_columns ||
1636  in != out,
1637  ExcMessage("In-place operation only supported for "
1638  "n_rows==n_columns or single-line interpolation"));
1639 
1640  // We cannot statically assert that direction is less than dim, so must do
1641  // an additional dynamic check
1642  AssertIndexRange(direction, dim);
1643 
1644  constexpr int nn = contract_over_rows ? n_columns : n_rows;
1645  constexpr int mm = contract_over_rows ? n_rows : n_columns;
1646  constexpr int n_cols = nn / 2;
1647  constexpr int mid = mm / 2;
1648 
1649  constexpr int stride = Utilities::pow(n_columns, direction);
1650  constexpr int n_blocks1 = one_line ? 1 : stride;
1651  constexpr int n_blocks2 =
1652  Utilities::pow(n_rows, (direction >= dim) ? 0 : (dim - direction - 1));
1653 
1654  constexpr int offset = (n_columns + 1) / 2;
1655 
1656  // this code may look very inefficient at first sight due to the many
1657  // different cases with if's at the innermost loop part, but all of the
1658  // conditionals can be evaluated at compile time because they are
1659  // templates, so the compiler should optimize everything away
1660  for (int i2 = 0; i2 < n_blocks2; ++i2)
1661  {
1662  for (int i1 = 0; i1 < n_blocks1; ++i1)
1663  {
1664  Number xp[mid > 0 ? mid : 1], xm[mid > 0 ? mid : 1];
1665  for (int i = 0; i < mid; ++i)
1666  {
1667  if (contract_over_rows == true && type == 1)
1668  {
1669  xp[i] = in[stride * i] - in[stride * (mm - 1 - i)];
1670  xm[i] = in[stride * i] + in[stride * (mm - 1 - i)];
1671  }
1672  else
1673  {
1674  xp[i] = in[stride * i] + in[stride * (mm - 1 - i)];
1675  xm[i] = in[stride * i] - in[stride * (mm - 1 - i)];
1676  }
1677  }
1678  Number xmid = in[stride * mid];
1679  for (int col = 0; col < n_cols; ++col)
1680  {
1681  Number r0, r1;
1682  if (mid > 0)
1683  {
1684  if (contract_over_rows == true)
1685  {
1686  r0 = shapes[col] * xp[0];
1687  r1 = shapes[(n_rows - 1) * offset + col] * xm[0];
1688  }
1689  else
1690  {
1691  r0 = shapes[col * offset] * xp[0];
1692  r1 = shapes[(n_rows - 1 - col) * offset] * xm[0];
1693  }
1694  for (int ind = 1; ind < mid; ++ind)
1695  {
1696  if (contract_over_rows == true)
1697  {
1698  r0 += shapes[ind * offset + col] * xp[ind];
1699  r1 += shapes[(n_rows - 1 - ind) * offset + col] *
1700  xm[ind];
1701  }
1702  else
1703  {
1704  r0 += shapes[col * offset + ind] * xp[ind];
1705  r1 += shapes[(n_rows - 1 - col) * offset + ind] *
1706  xm[ind];
1707  }
1708  }
1709  }
1710  else
1711  r0 = r1 = Number();
1712  if (mm % 2 == 1 && contract_over_rows == true)
1713  {
1714  if (type == 1)
1715  r1 += shapes[mid * offset + col] * xmid;
1716  else
1717  r0 += shapes[mid * offset + col] * xmid;
1718  }
1719  else if (mm % 2 == 1 && (nn % 2 == 0 || type > 0))
1720  r0 += shapes[col * offset + mid] * xmid;
1721 
1722  if (add == false)
1723  {
1724  out[stride * col] = r0 + r1;
1725  if (type == 1 && contract_over_rows == false)
1726  out[stride * (nn - 1 - col)] = r1 - r0;
1727  else
1728  out[stride * (nn - 1 - col)] = r0 - r1;
1729  }
1730  else
1731  {
1732  out[stride * col] += r0 + r1;
1733  if (type == 1 && contract_over_rows == false)
1734  out[stride * (nn - 1 - col)] += r1 - r0;
1735  else
1736  out[stride * (nn - 1 - col)] += r0 - r1;
1737  }
1738  }
1739  if (type == 0 && contract_over_rows == true && nn % 2 == 1 &&
1740  mm % 2 == 1)
1741  {
1742  if (add == false)
1743  out[stride * n_cols] = shapes[mid * offset + n_cols] * xmid;
1744  else
1745  out[stride * n_cols] += shapes[mid * offset + n_cols] * xmid;
1746  }
1747  else if (contract_over_rows == true && nn % 2 == 1)
1748  {
1749  Number r0;
1750  if (mid > 0)
1751  {
1752  r0 = shapes[n_cols] * xp[0];
1753  for (int ind = 1; ind < mid; ++ind)
1754  r0 += shapes[ind * offset + n_cols] * xp[ind];
1755  }
1756  else
1757  r0 = Number();
1758  if (type != 1 && mm % 2 == 1)
1759  r0 += shapes[mid * offset + n_cols] * xmid;
1760 
1761  if (add == false)
1762  out[stride * n_cols] = r0;
1763  else
1764  out[stride * n_cols] += r0;
1765  }
1766  else if (contract_over_rows == false && nn % 2 == 1)
1767  {
1768  Number r0;
1769  if (mid > 0)
1770  {
1771  if (type == 1)
1772  {
1773  r0 = shapes[n_cols * offset] * xm[0];
1774  for (int ind = 1; ind < mid; ++ind)
1775  r0 += shapes[n_cols * offset + ind] * xm[ind];
1776  }
1777  else
1778  {
1779  r0 = shapes[n_cols * offset] * xp[0];
1780  for (int ind = 1; ind < mid; ++ind)
1781  r0 += shapes[n_cols * offset + ind] * xp[ind];
1782  }
1783  }
1784  else
1785  r0 = Number();
1786 
1787  if ((type == 0 || type == 2) && mm % 2 == 1)
1788  r0 += shapes[n_cols * offset + mid] * xmid;
1789 
1790  if (add == false)
1791  out[stride * n_cols] = r0;
1792  else
1793  out[stride * n_cols] += r0;
1794  }
1795  if (one_line == false)
1796  {
1797  in += 1;
1798  out += 1;
1799  }
1800  }
1801  if (one_line == false)
1802  {
1803  in += stride * (mm - 1);
1804  out += stride * (nn - 1);
1805  }
1806  }
1807  }
1808 
1809 
1810 
1839  template <int dim,
1840  int n_rows,
1841  int n_columns,
1842  typename Number,
1843  typename Number2>
1845  dim,
1846  n_rows,
1847  n_columns,
1848  Number,
1849  Number2>
1850  {
1851  static constexpr unsigned int n_rows_of_product =
1852  Utilities::pow(n_rows, dim);
1853  static constexpr unsigned int n_columns_of_product =
1854  Utilities::pow(n_columns, dim);
1855 
1862  : shape_values(nullptr)
1863  , shape_gradients(nullptr)
1864  , shape_hessians(nullptr)
1865  {}
1866 
1872  : shape_values(shape_values.begin())
1873  , shape_gradients(nullptr)
1874  , shape_hessians(nullptr)
1875  {}
1876 
1882  const AlignedVector<Number2> &shape_gradients,
1883  const AlignedVector<Number2> &shape_hessians,
1884  const unsigned int dummy1 = 0,
1885  const unsigned int dummy2 = 0)
1886  : shape_values(shape_values.begin())
1887  , shape_gradients(shape_gradients.begin())
1888  , shape_hessians(shape_hessians.begin())
1889  {
1890  (void)dummy1;
1891  (void)dummy2;
1892  }
1893 
1894  template <int direction, bool contract_over_rows, bool add>
1895  void
1896  values(const Number in[], Number out[]) const
1897  {
1898  Assert(shape_values != nullptr, ExcNotInitialized());
1899  apply<direction, contract_over_rows, add, 0>(shape_values, in, out);
1900  }
1901 
1902  template <int direction, bool contract_over_rows, bool add>
1903  void
1904  gradients(const Number in[], Number out[]) const
1905  {
1906  Assert(shape_gradients != nullptr, ExcNotInitialized());
1907  apply<direction, contract_over_rows, add, 1>(shape_gradients, in, out);
1908  }
1909 
1910  template <int direction, bool contract_over_rows, bool add>
1911  void
1912  hessians(const Number in[], Number out[]) const
1913  {
1914  Assert(shape_hessians != nullptr, ExcNotInitialized());
1915  apply<direction, contract_over_rows, add, 0>(shape_hessians, in, out);
1916  }
1917 
1918  template <int direction, bool contract_over_rows, bool add>
1919  void
1920  values_one_line(const Number in[], Number out[]) const
1921  {
1922  Assert(shape_values != nullptr, ExcNotInitialized());
1923  apply<direction, contract_over_rows, add, 0, true>(shape_values, in, out);
1924  }
1925 
1926  template <int direction, bool contract_over_rows, bool add>
1927  void
1928  gradients_one_line(const Number in[], Number out[]) const
1929  {
1930  Assert(shape_gradients != nullptr, ExcNotInitialized());
1931  apply<direction, contract_over_rows, add, 1, true>(shape_gradients,
1932  in,
1933  out);
1934  }
1935 
1936  template <int direction, bool contract_over_rows, bool add>
1937  void
1938  hessians_one_line(const Number in[], Number out[]) const
1939  {
1940  Assert(shape_hessians != nullptr, ExcNotInitialized());
1941  apply<direction, contract_over_rows, add, 0, true>(shape_hessians,
1942  in,
1943  out);
1944  }
1945 
1973  template <int direction,
1974  bool contract_over_rows,
1975  bool add,
1976  int type,
1977  bool one_line = false>
1978  static void
1979  apply(const Number2 *DEAL_II_RESTRICT shape_data,
1980  const Number * in,
1981  Number * out);
1982 
1983  const Number2 *shape_values;
1984  const Number2 *shape_gradients;
1985  const Number2 *shape_hessians;
1986  };
1987 
1988 
1989 
1990  template <int dim,
1991  int n_rows,
1992  int n_columns,
1993  typename Number,
1994  typename Number2>
1995  template <int direction,
1996  bool contract_over_rows,
1997  bool add,
1998  int type,
1999  bool one_line>
2000  inline void
2002  dim,
2003  n_rows,
2004  n_columns,
2005  Number,
2006  Number2>::apply(const Number2 *DEAL_II_RESTRICT shapes,
2007  const Number * in,
2008  Number * out)
2009  {
2010  static_assert(one_line == false || direction == dim - 1,
2011  "Single-line evaluation only works for direction=dim-1.");
2012  static_assert(
2013  type == 0 || type == 1,
2014  "Only types 0 and 1 implemented for evaluate_symmetric_hierarchical.");
2015  Assert(dim == direction + 1 || one_line == true || n_rows == n_columns ||
2016  in != out,
2017  ExcMessage("In-place operation only supported for "
2018  "n_rows==n_columns or single-line interpolation"));
2019 
2020  // We cannot statically assert that direction is less than dim, so must do
2021  // an additional dynamic check
2022  AssertIndexRange(direction, dim);
2023 
2024  constexpr int nn = contract_over_rows ? n_columns : n_rows;
2025  constexpr int mm = contract_over_rows ? n_rows : n_columns;
2026  constexpr int n_cols = nn / 2;
2027  constexpr int mid = mm / 2;
2028 
2029  constexpr int stride = Utilities::pow(n_columns, direction);
2030  constexpr int n_blocks1 = one_line ? 1 : stride;
2031  constexpr int n_blocks2 =
2032  Utilities::pow(n_rows, (direction >= dim) ? 0 : (dim - direction - 1));
2033 
2034  // this code may look very inefficient at first sight due to the many
2035  // different cases with if's at the innermost loop part, but all of the
2036  // conditionals can be evaluated at compile time because they are
2037  // templates, so the compiler should optimize everything away
2038  for (int i2 = 0; i2 < n_blocks2; ++i2)
2039  {
2040  for (int i1 = 0; i1 < n_blocks1; ++i1)
2041  {
2042  if (contract_over_rows)
2043  {
2044  Number x[mm];
2045  for (unsigned int i = 0; i < mm; ++i)
2046  x[i] = in[stride * i];
2047  for (unsigned int col = 0; col < n_cols; ++col)
2048  {
2049  Number r0, r1;
2050  if (mid > 0)
2051  {
2052  r0 = shapes[col] * x[0];
2053  r1 = shapes[col + n_columns] * x[1];
2054  for (unsigned int ind = 1; ind < mid; ++ind)
2055  {
2056  r0 +=
2057  shapes[col + 2 * ind * n_columns] * x[2 * ind];
2058  r1 += shapes[col + (2 * ind + 1) * n_columns] *
2059  x[2 * ind + 1];
2060  }
2061  }
2062  else
2063  r0 = r1 = Number();
2064  if (mm % 2 == 1)
2065  r0 += shapes[col + (mm - 1) * n_columns] * x[mm - 1];
2066  if (add == false)
2067  {
2068  out[stride * col] = r0 + r1;
2069  if (type == 1)
2070  out[stride * (nn - 1 - col)] = r1 - r0;
2071  else
2072  out[stride * (nn - 1 - col)] = r0 - r1;
2073  }
2074  else
2075  {
2076  out[stride * col] += r0 + r1;
2077  if (type == 1)
2078  out[stride * (nn - 1 - col)] += r1 - r0;
2079  else
2080  out[stride * (nn - 1 - col)] += r0 - r1;
2081  }
2082  }
2083  if (nn % 2 == 1)
2084  {
2085  Number r0;
2086  const unsigned int shift = type == 1 ? 1 : 0;
2087  if (mid > 0)
2088  {
2089  r0 = shapes[n_cols + shift * n_columns] * x[shift];
2090  for (unsigned int ind = 1; ind < mid; ++ind)
2091  r0 += shapes[n_cols + (2 * ind + shift) * n_columns] *
2092  x[2 * ind + shift];
2093  }
2094  else
2095  r0 = 0;
2096  if (type != 1 && mm % 2 == 1)
2097  r0 += shapes[n_cols + (mm - 1) * n_columns] * x[mm - 1];
2098  if (add == false)
2099  out[stride * n_cols] = r0;
2100  else
2101  out[stride * n_cols] += r0;
2102  }
2103  }
2104  else
2105  {
2106  Number xp[mid + 1], xm[mid > 0 ? mid : 1];
2107  for (int i = 0; i < mid; ++i)
2108  if (type == 0)
2109  {
2110  xp[i] = in[stride * i] + in[stride * (mm - 1 - i)];
2111  xm[i] = in[stride * i] - in[stride * (mm - 1 - i)];
2112  }
2113  else
2114  {
2115  xp[i] = in[stride * i] - in[stride * (mm - 1 - i)];
2116  xm[i] = in[stride * i] + in[stride * (mm - 1 - i)];
2117  }
2118  if (mm % 2 == 1)
2119  xp[mid] = in[stride * mid];
2120  for (unsigned int col = 0; col < n_cols; ++col)
2121  {
2122  Number r0, r1;
2123  if (mid > 0)
2124  {
2125  r0 = shapes[2 * col * n_columns] * xp[0];
2126  r1 = shapes[(2 * col + 1) * n_columns] * xm[0];
2127  for (unsigned int ind = 1; ind < mid; ++ind)
2128  {
2129  r0 += shapes[2 * col * n_columns + ind] * xp[ind];
2130  r1 +=
2131  shapes[(2 * col + 1) * n_columns + ind] * xm[ind];
2132  }
2133  }
2134  else
2135  r0 = r1 = Number();
2136  if (mm % 2 == 1)
2137  {
2138  if (type == 1)
2139  r1 +=
2140  shapes[(2 * col + 1) * n_columns + mid] * xp[mid];
2141  else
2142  r0 += shapes[2 * col * n_columns + mid] * xp[mid];
2143  }
2144  if (add == false)
2145  {
2146  out[stride * (2 * col)] = r0;
2147  out[stride * (2 * col + 1)] = r1;
2148  }
2149  else
2150  {
2151  out[stride * (2 * col)] += r0;
2152  out[stride * (2 * col + 1)] += r1;
2153  }
2154  }
2155  if (nn % 2 == 1)
2156  {
2157  Number r0;
2158  if (mid > 0)
2159  {
2160  r0 = shapes[(nn - 1) * n_columns] * xp[0];
2161  for (unsigned int ind = 1; ind < mid; ++ind)
2162  r0 += shapes[(nn - 1) * n_columns + ind] * xp[ind];
2163  }
2164  else
2165  r0 = Number();
2166  if (mm % 2 == 1 && type == 0)
2167  r0 += shapes[(nn - 1) * n_columns + mid] * xp[mid];
2168  if (add == false)
2169  out[stride * (nn - 1)] = r0;
2170  else
2171  out[stride * (nn - 1)] += r0;
2172  }
2173  }
2174  if (one_line == false)
2175  {
2176  in += 1;
2177  out += 1;
2178  }
2179  }
2180  if (one_line == false)
2181  {
2182  in += stride * (mm - 1);
2183  out += stride * (nn - 1);
2184  }
2185  }
2186  }
2187 
2188 } // end of namespace internal
2189 
2190 
2191 DEAL_II_NAMESPACE_CLOSE
2192 
2193 #endif
static const unsigned int invalid_unsigned_int
Definition: types.h:173
#define AssertDimension(dim1, dim2)
Definition: exceptions.h:1366
constexpr unsigned int pow(const unsigned int base, const unsigned int iexp)
Definition: utilities.h:353
EvaluatorTensorProduct(const AlignedVector< Number2 > &shape_values, const AlignedVector< Number2 > &shape_gradients, const AlignedVector< Number2 > &shape_hessians, const unsigned int dummy1=0, const unsigned int dummy2=0)
#define AssertIndexRange(index, range)
Definition: exceptions.h:1407
static::ExceptionBase & ExcNotInitialized()
size_type size() const
T fixed_power(const T t)
Definition: utilities.h:912
static::ExceptionBase & ExcMessage(std::string arg1)
EvaluatorTensorProduct(const AlignedVector< Number2 > &shape_values, const AlignedVector< Number2 > &shape_gradients, const AlignedVector< Number2 > &shape_hessians, const unsigned int dummy1=0, const unsigned int dummy2=0)
#define Assert(cond, exc)
Definition: exceptions.h:1227
static::ExceptionBase & ExcDimensionMismatch(std::size_t arg1, std::size_t arg2)
EvaluatorTensorProduct(const AlignedVector< Number2 > &shape_values, const AlignedVector< Number2 > &shape_gradients, const AlignedVector< Number2 > &shape_hessians, const unsigned int n_rows, const unsigned int n_columns)
EvaluatorTensorProduct(const AlignedVector< Number2 > &shape_values, const AlignedVector< Number2 > &shape_gradients, const AlignedVector< Number2 > &shape_hessians, const unsigned int dummy1=0, const unsigned int dummy2=0)
EvaluatorTensorProduct(const AlignedVector< Number2 > &shape_values, const AlignedVector< Number2 > &shape_gradients, const AlignedVector< Number2 > &shape_hessians, const unsigned int dummy1=0, const unsigned int dummy2=0)
static::ExceptionBase & ExcNotImplemented()
bool empty() const