doxygen/deal.II/tensor__product__kernels_8h_source.html

 // ---------------------------------------------------------------------
 //
 // Copyright (C) 2017 - 2018-2018 by the deal.II authors
 //
 // This file is part of the deal.II library.
 //
 // The deal.II library is free software; you can use it, redistribute
 // it, and/or modify it under the terms of the GNU Lesser General
 // Public License as published by the Free Software Foundation; either
 // version 2.1 of the License, or (at your option) any later version.
 // The full text of the license can be found in the file LICENSE.md at
 // the top level directory of deal.II.
 //
 // ---------------------------------------------------------------------


 #ifndef dealii_matrix_free_tensor_product_kernels_h
 #define dealii_matrix_free_tensor_product_kernels_h

 #include <deal.II/base/config.h>

 #include <deal.II/base/aligned_vector.h>
 #include <deal.II/base/utilities.h>


 DEAL_II_NAMESPACE_OPEN


 namespace internal
 {
   enum EvaluatorVariant
   {
     evaluate_general,
     evaluate_symmetric,
     evaluate_evenodd,
     evaluate_symmetric_hierarchical
   };


   template <EvaluatorVariant variant,
             int              dim,
             int              n_rows,
             int              n_columns,
             typename Number,
             typename Number2 = Number>
   struct EvaluatorTensorProduct
   {};


   template <int dim,
             int n_rows,
             int n_columns,
             typename Number,
             typename Number2>
   struct EvaluatorTensorProduct<evaluate_general,
                                 dim,
                                 n_rows,
                                 n_columns,
                                 Number,
                                 Number2>
   {
     static constexpr unsigned int n_rows_of_product =
       Utilities::pow(n_rows, dim);
     static constexpr unsigned int n_columns_of_product =
       Utilities::pow(n_columns, dim);

     EvaluatorTensorProduct()
       : shape_values(nullptr)
       , shape_gradients(nullptr)
       , shape_hessians(nullptr)
     {}

     EvaluatorTensorProduct(const AlignedVector<Number2> &shape_values,
                            const AlignedVector<Number2> &shape_gradients,
                            const AlignedVector<Number2> &shape_hessians,
                            const unsigned int            dummy1 = 0,
                            const unsigned int            dummy2 = 0)
       : shape_values(shape_values.begin())
       , shape_gradients(shape_gradients.begin())
       , shape_hessians(shape_hessians.begin())
     {
       // We can enter this function either for the apply() path that has
       // n_rows * n_columns entries or for the apply_face() path that only has
       // n_rows * 3 entries in the array. Since we cannot decide about the use
       // we must allow for both here.
       Assert(shape_values.size() == 0 ||
                shape_values.size() == n_rows * n_columns ||
                shape_values.size() == 3 * n_rows,
              ExcDimensionMismatch(shape_values.size(), n_rows * n_columns));
       Assert(shape_gradients.size() == 0 ||
                shape_gradients.size() == n_rows * n_columns,
              ExcDimensionMismatch(shape_gradients.size(), n_rows * n_columns));
       Assert(shape_hessians.size() == 0 ||
                shape_hessians.size() == n_rows * n_columns,
              ExcDimensionMismatch(shape_hessians.size(), n_rows * n_columns));
       (void)dummy1;
       (void)dummy2;
     }

     template <int direction, bool contract_over_rows, bool add>
     void
     values(const Number in[], Number out[]) const
     {
       apply<direction, contract_over_rows, add>(shape_values, in, out);
     }

     template <int direction, bool contract_over_rows, bool add>
     void
     gradients(const Number in[], Number out[]) const
     {
       apply<direction, contract_over_rows, add>(shape_gradients, in, out);
     }

     template <int direction, bool contract_over_rows, bool add>
     void
     hessians(const Number in[], Number out[]) const
     {
       apply<direction, contract_over_rows, add>(shape_hessians, in, out);
     }

     template <int direction, bool contract_over_rows, bool add>
     void
     values_one_line(const Number in[], Number out[]) const
     {
       Assert(shape_values != nullptr, ExcNotInitialized());
       apply<direction, contract_over_rows, add, true>(shape_values, in, out);
     }

     template <int direction, bool contract_over_rows, bool add>
     void
     gradients_one_line(const Number in[], Number out[]) const
     {
       Assert(shape_gradients != nullptr, ExcNotInitialized());
       apply<direction, contract_over_rows, add, true>(shape_gradients, in, out);
     }

     template <int direction, bool contract_over_rows, bool add>
     void
     hessians_one_line(const Number in[], Number out[]) const
     {
       Assert(shape_hessians != nullptr, ExcNotInitialized());
       apply<direction, contract_over_rows, add, true>(shape_hessians, in, out);
     }

     template <int  direction,
               bool contract_over_rows,
               bool add,
               bool one_line = false>
     static void
     apply(const Number2 *DEAL_II_RESTRICT shape_data,
           const Number *                  in,
           Number *                        out);

     template <int  face_direction,
               bool contract_onto_face,
               bool add,
               int  max_derivative>
     void
     apply_face(const Number *DEAL_II_RESTRICT in,
                Number *DEAL_II_RESTRICT out) const;

     const Number2 *shape_values;
     const Number2 *shape_gradients;
     const Number2 *shape_hessians;
   };


   template <int dim,
             int n_rows,
             int n_columns,
             typename Number,
             typename Number2>
   template <int direction, bool contract_over_rows, bool add, bool one_line>
   inline void
   EvaluatorTensorProduct<evaluate_general,
                          dim,
                          n_rows,
                          n_columns,
                          Number,
                          Number2>::apply(const Number2 *DEAL_II_RESTRICT
                                                         shape_data,
                                          const Number * in,
                                          Number *       out)
   {
     static_assert(one_line == false || direction == dim - 1,
                   "Single-line evaluation only works for direction=dim-1.");
     Assert(shape_data != nullptr,
            ExcMessage(
              "The given array shape_data must not be the null pointer!"));
     Assert(dim == direction + 1 || one_line == true || n_rows == n_columns ||
              in != out,
            ExcMessage("In-place operation only supported for "
                       "n_rows==n_columns or single-line interpolation"));
     AssertIndexRange(direction, dim);
     constexpr int mm = contract_over_rows ? n_rows : n_columns,
                   nn = contract_over_rows ? n_columns : n_rows;

     constexpr int stride    = Utilities::pow(n_columns, direction);
     constexpr int n_blocks1 = one_line ? 1 : stride;
     constexpr int n_blocks2 =
       Utilities::pow(n_rows, (direction >= dim) ? 0 : (dim - direction - 1));

     for (int i2 = 0; i2 < n_blocks2; ++i2)
       {
         for (int i1 = 0; i1 < n_blocks1; ++i1)
           {
             Number x[mm];
             for (int i = 0; i < mm; ++i)
               x[i] = in[stride * i];
             for (int col = 0; col < nn; ++col)
               {
                 Number2 val0;
                 if (contract_over_rows == true)
                   val0 = shape_data[col];
                 else
                   val0 = shape_data[col * n_columns];
                 Number res0 = val0 * x[0];
                 for (int i = 1; i < mm; ++i)
                   {
                     if (contract_over_rows == true)
                       val0 = shape_data[i * n_columns + col];
                     else
                       val0 = shape_data[col * n_columns + i];
                     res0 += val0 * x[i];
                   }
                 if (add == false)
                   out[stride * col] = res0;
                 else
                   out[stride * col] += res0;
               }

             if (one_line == false)
               {
                 ++in;
                 ++out;
               }
           }
         if (one_line == false)
           {
             in += stride * (mm - 1);
             out += stride * (nn - 1);
           }
       }
   }


   template <int dim,
             int n_rows,
             int n_columns,
             typename Number,
             typename Number2>
   template <int  face_direction,
             bool contract_onto_face,
             bool add,
             int  max_derivative>
   inline void
   EvaluatorTensorProduct<evaluate_general,
                          dim,
                          n_rows,
                          n_columns,
                          Number,
                          Number2>::apply_face(const Number *DEAL_II_RESTRICT in,
                                               Number *DEAL_II_RESTRICT
                                                       out) const
   {
     static_assert(dim > 0 && dim < 4, "Only dim=1,2,3 supported");
     static_assert(max_derivative >= 0 && max_derivative < 3,
                   "Only derivative orders 0-2 implemented");
     Assert(shape_values != nullptr,
            ExcMessage(
              "The given array shape_values must not be the null pointer."));

     constexpr int n_blocks1 = dim > 1 ? n_rows : 1;
     constexpr int n_blocks2 = dim > 2 ? n_rows : 1;

     AssertIndexRange(face_direction, dim);
     constexpr int stride     = Utilities::pow(n_rows, face_direction);
     constexpr int out_stride = Utilities::pow(n_rows, dim - 1);
     const Number *DEAL_II_RESTRICT shape_values = this->shape_values;

     for (int i2 = 0; i2 < n_blocks2; ++i2)
       {
         for (int i1 = 0; i1 < n_blocks1; ++i1)
           {
             if (contract_onto_face == true)
               {
                 Number res0 = shape_values[0] * in[0];
                 Number res1, res2;
                 if (max_derivative > 0)
                   res1 = shape_values[n_rows] * in[0];
                 if (max_derivative > 1)
                   res2 = shape_values[2 * n_rows] * in[0];
                 for (int ind = 1; ind < n_rows; ++ind)
                   {
                     res0 += shape_values[ind] * in[stride * ind];
                     if (max_derivative > 0)
                       res1 += shape_values[ind + n_rows] * in[stride * ind];
                     if (max_derivative > 1)
                       res2 += shape_values[ind + 2 * n_rows] * in[stride * ind];
                   }
                 if (add == false)
                   {
                     out[0] = res0;
                     if (max_derivative > 0)
                       out[out_stride] = res1;
                     if (max_derivative > 1)
                       out[2 * out_stride] = res2;
                   }
                 else
                   {
                     out[0] += res0;
                     if (max_derivative > 0)
                       out[out_stride] += res1;
                     if (max_derivative > 1)
                       out[2 * out_stride] += res2;
                   }
               }
             else
               {
                 for (int col = 0; col < n_rows; ++col)
                   {
                     if (add == false)
                       out[col * stride] = shape_values[col] * in[0];
                     else
                       out[col * stride] += shape_values[col] * in[0];
                     if (max_derivative > 0)
                       out[col * stride] +=
                         shape_values[col + n_rows] * in[out_stride];
                     if (max_derivative > 1)
                       out[col * stride] +=
                         shape_values[col + 2 * n_rows] * in[2 * out_stride];
                   }
               }

             // increment: in regular case, just go to the next point in
             // x-direction. If we are at the end of one chunk in x-dir, need
             // to jump over to the next layer in z-direction
             switch (face_direction)
               {
                 case 0:
                   in += contract_onto_face ? n_rows : 1;
                   out += contract_onto_face ? 1 : n_rows;
                   break;
                 case 1:
                   ++in;
                   ++out;
                   // faces 2 and 3 in 3D use local coordinate system zx, which
                   // is the other way around compared to the tensor
                   // product. Need to take that into account.
                   if (dim == 3)
                     {
                       if (contract_onto_face)
                         out += n_rows - 1;
                       else
                         in += n_rows - 1;
                     }
                   break;
                 case 2:
                   ++in;
                   ++out;
                   break;
                 default:
                   Assert(false, ExcNotImplemented());
               }
           }
         if (face_direction == 1 && dim == 3)
           {
             // adjust for local coordinate system zx
             if (contract_onto_face)
               {
                 in += n_rows * (n_rows - 1);
                 out -= n_rows * n_rows - 1;
               }
             else
               {
                 out += n_rows * (n_rows - 1);
                 in -= n_rows * n_rows - 1;
               }
           }
       }
   }


   template <int dim, typename Number, typename Number2>
   struct EvaluatorTensorProduct<evaluate_general, dim, 0, 0, Number, Number2>
   {
     static constexpr unsigned int n_rows_of_product =
       numbers::invalid_unsigned_int;
     static constexpr unsigned int n_columns_of_product =
       numbers::invalid_unsigned_int;

     EvaluatorTensorProduct()
       : shape_values(nullptr)
       , shape_gradients(nullptr)
       , shape_hessians(nullptr)
       , n_rows(numbers::invalid_unsigned_int)
       , n_columns(numbers::invalid_unsigned_int)
     {}

     EvaluatorTensorProduct(const AlignedVector<Number2> &shape_values,
                            const AlignedVector<Number2> &shape_gradients,
                            const AlignedVector<Number2> &shape_hessians,
                            const unsigned int            n_rows,
                            const unsigned int            n_columns)
       : shape_values(shape_values.begin())
       , shape_gradients(shape_gradients.begin())
       , shape_hessians(shape_hessians.begin())
       , n_rows(n_rows)
       , n_columns(n_columns)
     {
       // We can enter this function either for the apply() path that has
       // n_rows * n_columns entries or for the apply_face() path that only has
       // n_rows * 3 entries in the array. Since we cannot decide about the use
       // we must allow for both here.
       Assert(shape_values.size() == 0 ||
                shape_values.size() == n_rows * n_columns ||
                shape_values.size() == n_rows * 3,
              ExcDimensionMismatch(shape_values.size(), n_rows * n_columns));
       Assert(shape_gradients.size() == 0 ||
                shape_gradients.size() == n_rows * n_columns,
              ExcDimensionMismatch(shape_gradients.size(), n_rows * n_columns));
       Assert(shape_hessians.size() == 0 ||
                shape_hessians.size() == n_rows * n_columns,
              ExcDimensionMismatch(shape_hessians.size(), n_rows * n_columns));
     }

     template <int direction, bool contract_over_rows, bool add>
     void
     values(const Number *in, Number *out) const
     {
       apply<direction, contract_over_rows, add>(shape_values, in, out);
     }

     template <int direction, bool contract_over_rows, bool add>
     void
     gradients(const Number *in, Number *out) const
     {
       apply<direction, contract_over_rows, add>(shape_gradients, in, out);
     }

     template <int direction, bool contract_over_rows, bool add>
     void
     hessians(const Number *in, Number *out) const
     {
       apply<direction, contract_over_rows, add>(shape_hessians, in, out);
     }

     template <int direction, bool contract_over_rows, bool add>
     void
     values_one_line(const Number in[], Number out[]) const
     {
       Assert(shape_values != nullptr, ExcNotInitialized());
       apply<direction, contract_over_rows, add, true>(shape_values, in, out);
     }

     template <int direction, bool contract_over_rows, bool add>
     void
     gradients_one_line(const Number in[], Number out[]) const
     {
       Assert(shape_gradients != nullptr, ExcNotInitialized());
       apply<direction, contract_over_rows, add, true>(shape_gradients, in, out);
     }

     template <int direction, bool contract_over_rows, bool add>
     void
     hessians_one_line(const Number in[], Number out[]) const
     {
       Assert(shape_hessians != nullptr, ExcNotInitialized());
       apply<direction, contract_over_rows, add, true>(shape_hessians, in, out);
     }

     template <int  direction,
               bool contract_over_rows,
               bool add,
               bool one_line = false>
     void
     apply(const Number2 *DEAL_II_RESTRICT shape_data,
           const Number *                  in,
           Number *                        out) const;

     template <int  face_direction,
               bool contract_onto_face,
               bool add,
               int  max_derivative>
     void
     apply_face(const Number *DEAL_II_RESTRICT in,
                Number *DEAL_II_RESTRICT out) const;

     const Number2 *    shape_values;
     const Number2 *    shape_gradients;
     const Number2 *    shape_hessians;
     const unsigned int n_rows;
     const unsigned int n_columns;
   };


   template <int dim, typename Number, typename Number2>
   template <int direction, bool contract_over_rows, bool add, bool one_line>
   inline void
   EvaluatorTensorProduct<evaluate_general, dim, 0, 0, Number, Number2>::apply(
     const Number2 *DEAL_II_RESTRICT shape_data,
     const Number *                  in,
     Number *                        out) const
   {
     static_assert(one_line == false || direction == dim - 1,
                   "Single-line evaluation only works for direction=dim-1.");
     Assert(shape_data != nullptr,
            ExcMessage(
              "The given array shape_data must not be the null pointer!"));
     Assert(dim == direction + 1 || one_line == true || n_rows == n_columns ||
              in != out,
            ExcMessage("In-place operation only supported for "
                       "n_rows==n_columns or single-line interpolation"));
     AssertIndexRange(direction, dim);
     const int mm = contract_over_rows ? n_rows : n_columns,
               nn = contract_over_rows ? n_columns : n_rows;

     const int stride =
       direction == 0 ? 1 : Utilities::fixed_power<direction>(n_columns);
     const int n_blocks1 = one_line ? 1 : stride;
     const int n_blocks2 = direction >= dim - 1 ?
                             1 :
                             Utilities::fixed_power<dim - direction - 1>(n_rows);
     Assert(n_rows <= 128, ExcNotImplemented());

     for (int i2 = 0; i2 < n_blocks2; ++i2)
       {
         for (int i1 = 0; i1 < n_blocks1; ++i1)
           {
             Number x[129];
             for (int i = 0; i < mm; ++i)
               x[i] = in[stride * i];
             for (int col = 0; col < nn; ++col)
               {
                 Number2 val0;
                 if (contract_over_rows == true)
                   val0 = shape_data[col];
                 else
                   val0 = shape_data[col * n_columns];
                 Number res0 = val0 * x[0];
                 for (int i = 1; i < mm; ++i)
                   {
                     if (contract_over_rows == true)
                       val0 = shape_data[i * n_columns + col];
                     else
                       val0 = shape_data[col * n_columns + i];
                     res0 += val0 * x[i];
                   }
                 if (add == false)
                   out[stride * col] = res0;
                 else
                   out[stride * col] += res0;
               }

             if (one_line == false)
               {
                 ++in;
                 ++out;
               }
           }
         if (one_line == false)
           {
             in += stride * (mm - 1);
             out += stride * (nn - 1);
           }
       }
   }


   template <int dim, typename Number, typename Number2>
   template <int  face_direction,
             bool contract_onto_face,
             bool add,
             int  max_derivative>
   inline void
   EvaluatorTensorProduct<evaluate_general, dim, 0, 0, Number, Number2>::
     apply_face(const Number *DEAL_II_RESTRICT in,
                Number *DEAL_II_RESTRICT out) const
   {
     Assert(shape_values != nullptr,
            ExcMessage(
              "The given array shape_data must not be the null pointer!"));
     static_assert(dim > 0 && dim < 4, "Only dim=1,2,3 supported");
     const int n_blocks1 = dim > 1 ? n_rows : 1;
     const int n_blocks2 = dim > 2 ? n_rows : 1;

     AssertIndexRange(face_direction, dim);
     const int stride =
       face_direction > 0 ? Utilities::fixed_power<face_direction>(n_rows) : 1;
     const int out_stride =
       dim > 1 ? Utilities::fixed_power<dim - 1>(n_rows) : 1;

     for (int i2 = 0; i2 < n_blocks2; ++i2)
       {
         for (int i1 = 0; i1 < n_blocks1; ++i1)
           {
             if (contract_onto_face == true)
               {
                 Number res0 = shape_values[0] * in[0];
                 Number res1, res2;
                 if (max_derivative > 0)
                   res1 = shape_values[n_rows] * in[0];
                 if (max_derivative > 1)
                   res2 = shape_values[2 * n_rows] * in[0];
                 for (unsigned int ind = 1; ind < n_rows; ++ind)
                   {
                     res0 += shape_values[ind] * in[stride * ind];
                     if (max_derivative > 0)
                       res1 += shape_values[ind + n_rows] * in[stride * ind];
                     if (max_derivative > 1)
                       res2 += shape_values[ind + 2 * n_rows] * in[stride * ind];
                   }
                 if (add == false)
                   {
                     out[0] = res0;
                     if (max_derivative > 0)
                       out[out_stride] = res1;
                     if (max_derivative > 1)
                       out[2 * out_stride] = res2;
                   }
                 else
                   {
                     out[0] += res0;
                     if (max_derivative > 0)
                       out[out_stride] += res1;
                     if (max_derivative > 1)
                       out[2 * out_stride] += res2;
                   }
               }
             else
               {
                 for (unsigned int col = 0; col < n_rows; ++col)
                   {
                     if (add == false)
                       out[col * stride] = shape_values[col] * in[0];
                     else
                       out[col * stride] += shape_values[col] * in[0];
                     if (max_derivative > 0)
                       out[col * stride] +=
                         shape_values[col + n_rows] * in[out_stride];
                     if (max_derivative > 1)
                       out[col * stride] +=
                         shape_values[col + 2 * n_rows] * in[2 * out_stride];
                   }
               }

             // increment: in regular case, just go to the next point in
             // x-direction. If we are at the end of one chunk in x-dir, need
             // to jump over to the next layer in z-direction
             switch (face_direction)
               {
                 case 0:
                   in += contract_onto_face ? n_rows : 1;
                   out += contract_onto_face ? 1 : n_rows;
                   break;
                 case 1:
                   ++in;
                   ++out;
                   // faces 2 and 3 in 3D use local coordinate system zx, which
                   // is the other way around compared to the tensor
                   // product. Need to take that into account.
                   if (dim == 3)
                     {
                       if (contract_onto_face)
                         out += n_rows - 1;
                       else
                         in += n_rows - 1;
                     }
                   break;
                 case 2:
                   ++in;
                   ++out;
                   break;
                 default:
                   Assert(false, ExcNotImplemented());
               }
           }
         if (face_direction == 1 && dim == 3)
           {
             // adjust for local coordinate system zx
             if (contract_onto_face)
               {
                 in += n_rows * (n_rows - 1);
                 out -= n_rows * n_rows - 1;
               }
             else
               {
                 out += n_rows * (n_rows - 1);
                 in -= n_rows * n_rows - 1;
               }
           }
       }
   }


   template <int dim,
             int n_rows,
             int n_columns,
             typename Number,
             typename Number2>
   struct EvaluatorTensorProduct<evaluate_symmetric,
                                 dim,
                                 n_rows,
                                 n_columns,
                                 Number,
                                 Number2>
   {
     static constexpr unsigned int n_rows_of_product =
       Utilities::pow(n_rows, dim);
     static constexpr unsigned int n_columns_of_product =
       Utilities::pow(n_columns, dim);

     EvaluatorTensorProduct(const AlignedVector<Number2> &shape_values,
                            const AlignedVector<Number2> &shape_gradients,
                            const AlignedVector<Number2> &shape_hessians,
                            const unsigned int            dummy1 = 0,
                            const unsigned int            dummy2 = 0)
       : shape_values(shape_values.begin())
       , shape_gradients(shape_gradients.begin())
       , shape_hessians(shape_hessians.begin())
     {
       Assert(shape_values.size() == 0 ||
                shape_values.size() == n_rows * n_columns,
              ExcDimensionMismatch(shape_values.size(), n_rows * n_columns));
       Assert(shape_gradients.size() == 0 ||
                shape_gradients.size() == n_rows * n_columns,
              ExcDimensionMismatch(shape_gradients.size(), n_rows * n_columns));
       Assert(shape_hessians.size() == 0 ||
                shape_hessians.size() == n_rows * n_columns,
              ExcDimensionMismatch(shape_hessians.size(), n_rows * n_columns));
       (void)dummy1;
       (void)dummy2;
     }

     template <int direction, bool contract_over_rows, bool add>
     void
     values(const Number in[], Number out[]) const;

     template <int direction, bool contract_over_rows, bool add>
     void
     gradients(const Number in[], Number out[]) const;

     template <int direction, bool contract_over_rows, bool add>
     void
     hessians(const Number in[], Number out[]) const;

     const Number2 *shape_values;
     const Number2 *shape_gradients;
     const Number2 *shape_hessians;
   };


   // In this case, the 1D shape values read (sorted lexicographically, rows
   // run over 1D dofs, columns over quadrature points):
   // Q2 --> [ 0.687  0 -0.087 ]
   //        [ 0.4    1  0.4   ]
   //        [-0.087  0  0.687 ]
   // Q3 --> [ 0.66   0.003  0.002  0.049 ]
   //        [ 0.521  1.005 -0.01  -0.230 ]
   //        [-0.230 -0.01   1.005  0.521 ]
   //        [ 0.049  0.002  0.003  0.66  ]
   // Q4 --> [ 0.658  0.022  0 -0.007 -0.032 ]
   //        [ 0.608  1.059  0  0.039  0.176 ]
   //        [-0.409 -0.113  1 -0.113 -0.409 ]
   //        [ 0.176  0.039  0  1.059  0.608 ]
   //        [-0.032 -0.007  0  0.022  0.658 ]
   //
   // In these matrices, we want to use avoid computations involving zeros and
   // ones and in addition use the symmetry in entries to reduce the number of
   // read operations.
   template <int dim,
             int n_rows,
             int n_columns,
             typename Number,
             typename Number2>
   template <int direction, bool contract_over_rows, bool add>
   inline void
   EvaluatorTensorProduct<evaluate_symmetric,
                          dim,
                          n_rows,
                          n_columns,
                          Number,
                          Number2>::values(const Number in[], Number out[]) const
   {
     Assert(shape_values != nullptr, ExcNotInitialized());
     AssertIndexRange(direction, dim);
     constexpr int mm     = contract_over_rows ? n_rows : n_columns,
                   nn     = contract_over_rows ? n_columns : n_rows;
     constexpr int n_cols = nn / 2;
     constexpr int mid    = mm / 2;

     constexpr int stride    = Utilities::pow(n_columns, direction);
     constexpr int n_blocks1 = stride;
     constexpr int n_blocks2 =
       Utilities::pow(n_rows, (direction >= dim) ? 0 : (dim - direction - 1));

     for (int i2 = 0; i2 < n_blocks2; ++i2)
       {
         for (int i1 = 0; i1 < n_blocks1; ++i1)
           {
             for (int col = 0; col < n_cols; ++col)
               {
                 Number2 val0, val1;
                 Number  in0, in1, res0, res1;
                 if (contract_over_rows == true)
                   {
                     val0 = shape_values[col];
                     val1 = shape_values[nn - 1 - col];
                   }
                 else
                   {
                     val0 = shape_values[col * n_columns];
                     val1 = shape_values[(col + 1) * n_columns - 1];
                   }
                 if (mid > 0)
                   {
                     in0  = in[0];
                     in1  = in[stride * (mm - 1)];
                     res0 = val0 * in0;
                     res1 = val1 * in0;
                     res0 += val1 * in1;
                     res1 += val0 * in1;
                     for (int ind = 1; ind < mid; ++ind)
                       {
                         if (contract_over_rows == true)
                           {
                             val0 = shape_values[ind * n_columns + col];
                             val1 = shape_values[ind * n_columns + nn - 1 - col];
                           }
                         else
                           {
                             val0 = shape_values[col * n_columns + ind];
                             val1 =
                               shape_values[(col + 1) * n_columns - 1 - ind];
                           }
                         in0 = in[stride * ind];
                         in1 = in[stride * (mm - 1 - ind)];
                         res0 += val0 * in0;
                         res1 += val1 * in0;
                         res0 += val1 * in1;
                         res1 += val0 * in1;
                       }
                   }
                 else
                   res0 = res1 = Number();
                 if (contract_over_rows == true)
                   {
                     if (mm % 2 == 1)
                       {
                         val0 = shape_values[mid * n_columns + col];
                         in1  = val0 * in[stride * mid];
                         res0 += in1;
                         res1 += in1;
                       }
                   }
                 else
                   {
                     if (mm % 2 == 1 && nn % 2 == 0)
                       {
                         val0 = shape_values[col * n_columns + mid];
                         in1  = val0 * in[stride * mid];
                         res0 += in1;
                         res1 += in1;
                       }
                   }
                 if (add == false)
                   {
                     out[stride * col]            = res0;
                     out[stride * (nn - 1 - col)] = res1;
                   }
                 else
                   {
                     out[stride * col] += res0;
                     out[stride * (nn - 1 - col)] += res1;
                   }
               }
             if (contract_over_rows == true && nn % 2 == 1 && mm % 2 == 1)
               {
                 if (add == false)
                   out[stride * n_cols] = in[stride * mid];
                 else
                   out[stride * n_cols] += in[stride * mid];
               }
             else if (contract_over_rows == true && nn % 2 == 1)
               {
                 Number  res0;
                 Number2 val0 = shape_values[n_cols];
                 if (mid > 0)
                   {
                     res0 = val0 * (in[0] + in[stride * (mm - 1)]);
                     for (int ind = 1; ind < mid; ++ind)
                       {
                         val0 = shape_values[ind * n_columns + n_cols];
                         res0 += val0 * (in[stride * ind] +
                                         in[stride * (mm - 1 - ind)]);
                       }
                   }
                 else
                   res0 = Number();
                 if (add == false)
                   out[stride * n_cols] = res0;
                 else
                   out[stride * n_cols] += res0;
               }
             else if (contract_over_rows == false && nn % 2 == 1)
               {
                 Number res0;
                 if (mid > 0)
                   {
                     Number2 val0 = shape_values[n_cols * n_columns];
                     res0         = val0 * (in[0] + in[stride * (mm - 1)]);
                     for (int ind = 1; ind < mid; ++ind)
                       {
                         val0       = shape_values[n_cols * n_columns + ind];
                         Number in1 = val0 * (in[stride * ind] +
                                              in[stride * (mm - 1 - ind)]);
                         res0 += in1;
                       }
                     if (mm % 2)
                       res0 += in[stride * mid];
                   }
                 else
                   res0 = in[0];
                 if (add == false)
                   out[stride * n_cols] = res0;
                 else
                   out[stride * n_cols] += res0;
               }

             ++in;
             ++out;
           }
         in += stride * (mm - 1);
         out += stride * (nn - 1);
       }
   }


   // For the specialized loop used for the gradient computation in
   // here, the 1D shape values read (sorted lexicographically, rows
   // run over 1D dofs, columns over quadrature points):
   // Q2 --> [-2.549 -1  0.549 ]
   //        [ 3.098  0 -3.098 ]
   //        [-0.549  1  2.549 ]
   // Q3 --> [-4.315 -1.03  0.5  -0.44  ]
   //        [ 6.07  -1.44 -2.97  2.196 ]
   //        [-2.196  2.97  1.44 -6.07  ]
   //        [ 0.44  -0.5   1.03  4.315 ]
   // Q4 --> [-6.316 -1.3    0.333 -0.353  0.413 ]
   //        [10.111 -2.76  -2.667  2.066 -2.306 ]
   //        [-5.688  5.773  0     -5.773  5.688 ]
   //        [ 2.306 -2.066  2.667  2.76 -10.111 ]
   //        [-0.413  0.353 -0.333 -0.353  0.413 ]
   //
   // In these matrices, we want to use avoid computations involving
   // zeros and ones and in addition use the symmetry in entries to
   // reduce the number of read operations.
   template <int dim,
             int n_rows,
             int n_columns,
             typename Number,
             typename Number2>
   template <int direction, bool contract_over_rows, bool add>
   inline void
   EvaluatorTensorProduct<evaluate_symmetric,
                          dim,
                          n_rows,
                          n_columns,
                          Number,
                          Number2>::gradients(const Number in[],
                                              Number       out[]) const
   {
     Assert(shape_gradients != nullptr, ExcNotInitialized());
     AssertIndexRange(direction, dim);
     constexpr int mm     = contract_over_rows ? n_rows : n_columns,
                   nn     = contract_over_rows ? n_columns : n_rows;
     constexpr int n_cols = nn / 2;
     constexpr int mid    = mm / 2;

     constexpr int stride    = Utilities::pow(n_columns, direction);
     constexpr int n_blocks1 = stride;
     constexpr int n_blocks2 =
       Utilities::pow(n_rows, (direction >= dim) ? 0 : (dim - direction - 1));

     for (int i2 = 0; i2 < n_blocks2; ++i2)
       {
         for (int i1 = 0; i1 < n_blocks1; ++i1)
           {
             for (int col = 0; col < n_cols; ++col)
               {
                 Number2 val0, val1;
                 Number  in0, in1, res0, res1;
                 if (contract_over_rows == true)
                   {
                     val0 = shape_gradients[col];
                     val1 = shape_gradients[nn - 1 - col];
                   }
                 else
                   {
                     val0 = shape_gradients[col * n_columns];
                     val1 = shape_gradients[(nn - col - 1) * n_columns];
                   }
                 if (mid > 0)
                   {
                     in0  = in[0];
                     in1  = in[stride * (mm - 1)];
                     res0 = val0 * in0;
                     res1 = val1 * in0;
                     res0 -= val1 * in1;
                     res1 -= val0 * in1;
                     for (int ind = 1; ind < mid; ++ind)
                       {
                         if (contract_over_rows == true)
                           {
                             val0 = shape_gradients[ind * n_columns + col];
                             val1 =
                               shape_gradients[ind * n_columns + nn - 1 - col];
                           }
                         else
                           {
                             val0 = shape_gradients[col * n_columns + ind];
                             val1 =
                               shape_gradients[(nn - col - 1) * n_columns + ind];
                           }
                         in0 = in[stride * ind];
                         in1 = in[stride * (mm - 1 - ind)];
                         res0 += val0 * in0;
                         res1 += val1 * in0;
                         res0 -= val1 * in1;
                         res1 -= val0 * in1;
                       }
                   }
                 else
                   res0 = res1 = Number();
                 if (mm % 2 == 1)
                   {
                     if (contract_over_rows == true)
                       val0 = shape_gradients[mid * n_columns + col];
                     else
                       val0 = shape_gradients[col * n_columns + mid];
                     in1 = val0 * in[stride * mid];
                     res0 += in1;
                     res1 -= in1;
                   }
                 if (add == false)
                   {
                     out[stride * col]            = res0;
                     out[stride * (nn - 1 - col)] = res1;
                   }
                 else
                   {
                     out[stride * col] += res0;
                     out[stride * (nn - 1 - col)] += res1;
                   }
               }
             if (nn % 2 == 1)
               {
                 Number2 val0;
                 Number  res0;
                 if (contract_over_rows == true)
                   val0 = shape_gradients[n_cols];
                 else
                   val0 = shape_gradients[n_cols * n_columns];
                 res0 = val0 * (in[0] - in[stride * (mm - 1)]);
                 for (int ind = 1; ind < mid; ++ind)
                   {
                     if (contract_over_rows == true)
                       val0 = shape_gradients[ind * n_columns + n_cols];
                     else
                       val0 = shape_gradients[n_cols * n_columns + ind];
                     Number in1 =
                       val0 * (in[stride * ind] - in[stride * (mm - 1 - ind)]);
                     res0 += in1;
                   }
                 if (add == false)
                   out[stride * n_cols] = res0;
                 else
                   out[stride * n_cols] += res0;
               }

             ++in;
             ++out;
           }
         in += stride * (mm - 1);
         out += stride * (nn - 1);
       }
   }


   // evaluates the given shape data in 1d-3d using the tensor product
   // form assuming the symmetries of unit cell shape hessians for
   // finite elements in FEEvaluation
   template <int dim,
             int n_rows,
             int n_columns,
             typename Number,
             typename Number2>
   template <int direction, bool contract_over_rows, bool add>
   inline void
   EvaluatorTensorProduct<evaluate_symmetric,
                          dim,
                          n_rows,
                          n_columns,
                          Number,
                          Number2>::hessians(const Number in[],
                                             Number       out[]) const
   {
     Assert(shape_hessians != nullptr, ExcNotInitialized());
     AssertIndexRange(direction, dim);
     constexpr int mm     = contract_over_rows ? n_rows : n_columns;
     constexpr int nn     = contract_over_rows ? n_columns : n_rows;
     constexpr int n_cols = nn / 2;
     constexpr int mid    = mm / 2;

     constexpr int stride    = Utilities::pow(n_columns, direction);
     constexpr int n_blocks1 = stride;
     constexpr int n_blocks2 =
       Utilities::pow(n_rows, (direction >= dim) ? 0 : (dim - direction - 1));

     for (int i2 = 0; i2 < n_blocks2; ++i2)
       {
         for (int i1 = 0; i1 < n_blocks1; ++i1)
           {
             for (int col = 0; col < n_cols; ++col)
               {
                 Number2 val0, val1;
                 Number  in0, in1, res0, res1;
                 if (contract_over_rows == true)
                   {
                     val0 = shape_hessians[col];
                     val1 = shape_hessians[nn - 1 - col];
                   }
                 else
                   {
                     val0 = shape_hessians[col * n_columns];
                     val1 = shape_hessians[(col + 1) * n_columns - 1];
                   }
                 if (mid > 0)
                   {
                     in0  = in[0];
                     in1  = in[stride * (mm - 1)];
                     res0 = val0 * in0;
                     res1 = val1 * in0;
                     res0 += val1 * in1;
                     res1 += val0 * in1;
                     for (int ind = 1; ind < mid; ++ind)
                       {
                         if (contract_over_rows == true)
                           {
                             val0 = shape_hessians[ind * n_columns + col];
                             val1 =
                               shape_hessians[ind * n_columns + nn - 1 - col];
                           }
                         else
                           {
                             val0 = shape_hessians[col * n_columns + ind];
                             val1 =
                               shape_hessians[(col + 1) * n_columns - 1 - ind];
                           }
                         in0 = in[stride * ind];
                         in1 = in[stride * (mm - 1 - ind)];
                         res0 += val0 * in0;
                         res1 += val1 * in0;
                         res0 += val1 * in1;
                         res1 += val0 * in1;
                       }
                   }
                 else
                   res0 = res1 = Number();
                 if (mm % 2 == 1)
                   {
                     if (contract_over_rows == true)
                       val0 = shape_hessians[mid * n_columns + col];
                     else
                       val0 = shape_hessians[col * n_columns + mid];
                     in1 = val0 * in[stride * mid];
                     res0 += in1;
                     res1 += in1;
                   }
                 if (add == false)
                   {
                     out[stride * col]            = res0;
                     out[stride * (nn - 1 - col)] = res1;
                   }
                 else
                   {
                     out[stride * col] += res0;
                     out[stride * (nn - 1 - col)] += res1;
                   }
               }
             if (nn % 2 == 1)
               {
                 Number2 val0;
                 Number  res0;
                 if (contract_over_rows == true)
                   val0 = shape_hessians[n_cols];
                 else
                   val0 = shape_hessians[n_cols * n_columns];
                 if (mid > 0)
                   {
                     res0 = val0 * (in[0] + in[stride * (mm - 1)]);
                     for (int ind = 1; ind < mid; ++ind)
                       {
                         if (contract_over_rows == true)
                           val0 = shape_hessians[ind * n_columns + n_cols];
                         else
                           val0 = shape_hessians[n_cols * n_columns + ind];
                         Number in1 = val0 * (in[stride * ind] +
                                              in[stride * (mm - 1 - ind)]);
                         res0 += in1;
                       }
                   }
                 else
                   res0 = Number();
                 if (mm % 2 == 1)
                   {
                     if (contract_over_rows == true)
                       val0 = shape_hessians[mid * n_columns + n_cols];
                     else
                       val0 = shape_hessians[n_cols * n_columns + mid];
                     res0 += val0 * in[stride * mid];
                   }
                 if (add == false)
                   out[stride * n_cols] = res0;
                 else
                   out[stride * n_cols] += res0;
               }

             ++in;
             ++out;
           }
         in += stride * (mm - 1);
         out += stride * (nn - 1);
       }
   }


   template <int dim,
             int n_rows,
             int n_columns,
             typename Number,
             typename Number2>
   struct EvaluatorTensorProduct<evaluate_evenodd,
                                 dim,
                                 n_rows,
                                 n_columns,
                                 Number,
                                 Number2>
   {
     static constexpr unsigned int n_rows_of_product =
       Utilities::pow(n_rows, dim);
     static constexpr unsigned int n_columns_of_product =
       Utilities::pow(n_columns, dim);

     EvaluatorTensorProduct()
       : shape_values(nullptr)
       , shape_gradients(nullptr)
       , shape_hessians(nullptr)
     {}

     EvaluatorTensorProduct(const AlignedVector<Number2> &shape_values)
       : shape_values(shape_values.begin())
       , shape_gradients(nullptr)
       , shape_hessians(nullptr)
     {
       AssertDimension(shape_values.size(), n_rows * ((n_columns + 1) / 2));
     }

     EvaluatorTensorProduct(const AlignedVector<Number2> &shape_values,
                            const AlignedVector<Number2> &shape_gradients,
                            const AlignedVector<Number2> &shape_hessians,
                            const unsigned int            dummy1 = 0,
                            const unsigned int            dummy2 = 0)
       : shape_values(shape_values.begin())
       , shape_gradients(shape_gradients.begin())
       , shape_hessians(shape_hessians.begin())
     {
       // In this function, we allow for dummy pointers if some of values,
       // gradients or hessians should not be computed
       if (!shape_values.empty())
         AssertDimension(shape_values.size(), n_rows * ((n_columns + 1) / 2));
       if (!shape_gradients.empty())
         AssertDimension(shape_gradients.size(), n_rows * ((n_columns + 1) / 2));
       if (!shape_hessians.empty())
         AssertDimension(shape_hessians.size(), n_rows * ((n_columns + 1) / 2));
       (void)dummy1;
       (void)dummy2;
     }

     template <int direction, bool contract_over_rows, bool add>
     void
     values(const Number in[], Number out[]) const
     {
       Assert(shape_values != nullptr, ExcNotInitialized());
       apply<direction, contract_over_rows, add, 0>(shape_values, in, out);
     }

     template <int direction, bool contract_over_rows, bool add>
     void
     gradients(const Number in[], Number out[]) const
     {
       Assert(shape_gradients != nullptr, ExcNotInitialized());
       apply<direction, contract_over_rows, add, 1>(shape_gradients, in, out);
     }

     template <int direction, bool contract_over_rows, bool add>
     void
     hessians(const Number in[], Number out[]) const
     {
       Assert(shape_hessians != nullptr, ExcNotInitialized());
       apply<direction, contract_over_rows, add, 2>(shape_hessians, in, out);
     }

     template <int direction, bool contract_over_rows, bool add>
     void
     values_one_line(const Number in[], Number out[]) const
     {
       Assert(shape_values != nullptr, ExcNotInitialized());
       apply<direction, contract_over_rows, add, 0, true>(shape_values, in, out);
     }

     template <int direction, bool contract_over_rows, bool add>
     void
     gradients_one_line(const Number in[], Number out[]) const
     {
       Assert(shape_gradients != nullptr, ExcNotInitialized());
       apply<direction, contract_over_rows, add, 1, true>(shape_gradients,
                                                          in,
                                                          out);
     }

     template <int direction, bool contract_over_rows, bool add>
     void
     hessians_one_line(const Number in[], Number out[]) const
     {
       Assert(shape_hessians != nullptr, ExcNotInitialized());
       apply<direction, contract_over_rows, add, 2, true>(shape_hessians,
                                                          in,
                                                          out);
     }

     template <int  direction,
               bool contract_over_rows,
               bool add,
               int  type,
               bool one_line = false>
     static void
     apply(const Number2 *DEAL_II_RESTRICT shape_data,
           const Number *                  in,
           Number *                        out);

     const Number2 *shape_values;
     const Number2 *shape_gradients;
     const Number2 *shape_hessians;
   };


   template <int dim,
             int n_rows,
             int n_columns,
             typename Number,
             typename Number2>
   template <int  direction,
             bool contract_over_rows,
             bool add,
             int  type,
             bool one_line>
   inline void
   EvaluatorTensorProduct<evaluate_evenodd,
                          dim,
                          n_rows,
                          n_columns,
                          Number,
                          Number2>::apply(const Number2 *DEAL_II_RESTRICT shapes,
                                          const Number *                  in,
                                          Number *                        out)
   {
     static_assert(type < 3, "Only three variants type=0,1,2 implemented");
     static_assert(one_line == false || direction == dim - 1,
                   "Single-line evaluation only works for direction=dim-1.");
     Assert(dim == direction + 1 || one_line == true || n_rows == n_columns ||
              in != out,
            ExcMessage("In-place operation only supported for "
                       "n_rows==n_columns or single-line interpolation"));

     // We cannot statically assert that direction is less than dim, so must do
     // an additional dynamic check
     AssertIndexRange(direction, dim);

     constexpr int nn     = contract_over_rows ? n_columns : n_rows;
     constexpr int mm     = contract_over_rows ? n_rows : n_columns;
     constexpr int n_cols = nn / 2;
     constexpr int mid    = mm / 2;

     constexpr int stride    = Utilities::pow(n_columns, direction);
     constexpr int n_blocks1 = one_line ? 1 : stride;
     constexpr int n_blocks2 =
       Utilities::pow(n_rows, (direction >= dim) ? 0 : (dim - direction - 1));

     constexpr int offset = (n_columns + 1) / 2;

     // this code may look very inefficient at first sight due to the many
     // different cases with if's at the innermost loop part, but all of the
     // conditionals can be evaluated at compile time because they are
     // templates, so the compiler should optimize everything away
     for (int i2 = 0; i2 < n_blocks2; ++i2)
       {
         for (int i1 = 0; i1 < n_blocks1; ++i1)
           {
             Number xp[mid > 0 ? mid : 1], xm[mid > 0 ? mid : 1];
             for (int i = 0; i < mid; ++i)
               {
                 if (contract_over_rows == true && type == 1)
                   {
                     xp[i] = in[stride * i] - in[stride * (mm - 1 - i)];
                     xm[i] = in[stride * i] + in[stride * (mm - 1 - i)];
                   }
                 else
                   {
                     xp[i] = in[stride * i] + in[stride * (mm - 1 - i)];
                     xm[i] = in[stride * i] - in[stride * (mm - 1 - i)];
                   }
               }
             Number xmid = in[stride * mid];
             for (int col = 0; col < n_cols; ++col)
               {
                 Number r0, r1;
                 if (mid > 0)
                   {
                     if (contract_over_rows == true)
                       {
                         r0 = shapes[col] * xp[0];
                         r1 = shapes[(n_rows - 1) * offset + col] * xm[0];
                       }
                     else
                       {
                         r0 = shapes[col * offset] * xp[0];
                         r1 = shapes[(n_rows - 1 - col) * offset] * xm[0];
                       }
                     for (int ind = 1; ind < mid; ++ind)
                       {
                         if (contract_over_rows == true)
                           {
                             r0 += shapes[ind * offset + col] * xp[ind];
                             r1 += shapes[(n_rows - 1 - ind) * offset + col] *
                                   xm[ind];
                           }
                         else
                           {
                             r0 += shapes[col * offset + ind] * xp[ind];
                             r1 += shapes[(n_rows - 1 - col) * offset + ind] *
                                   xm[ind];
                           }
                       }
                   }
                 else
                   r0 = r1 = Number();
                 if (mm % 2 == 1 && contract_over_rows == true)
                   {
                     if (type == 1)
                       r1 += shapes[mid * offset + col] * xmid;
                     else
                       r0 += shapes[mid * offset + col] * xmid;
                   }
                 else if (mm % 2 == 1 && (nn % 2 == 0 || type > 0))
                   r0 += shapes[col * offset + mid] * xmid;

                 if (add == false)
                   {
                     out[stride * col] = r0 + r1;
                     if (type == 1 && contract_over_rows == false)
                       out[stride * (nn - 1 - col)] = r1 - r0;
                     else
                       out[stride * (nn - 1 - col)] = r0 - r1;
                   }
                 else
                   {
                     out[stride * col] += r0 + r1;
                     if (type == 1 && contract_over_rows == false)
                       out[stride * (nn - 1 - col)] += r1 - r0;
                     else
                       out[stride * (nn - 1 - col)] += r0 - r1;
                   }
               }
             if (type == 0 && contract_over_rows == true && nn % 2 == 1 &&
                 mm % 2 == 1)
               {
                 if (add == false)
                   out[stride * n_cols] = shapes[mid * offset + n_cols] * xmid;
                 else
                   out[stride * n_cols] += shapes[mid * offset + n_cols] * xmid;
               }
             else if (contract_over_rows == true && nn % 2 == 1)
               {
                 Number r0;
                 if (mid > 0)
                   {
                     r0 = shapes[n_cols] * xp[0];
                     for (int ind = 1; ind < mid; ++ind)
                       r0 += shapes[ind * offset + n_cols] * xp[ind];
                   }
                 else
                   r0 = Number();
                 if (type != 1 && mm % 2 == 1)
                   r0 += shapes[mid * offset + n_cols] * xmid;

                 if (add == false)
                   out[stride * n_cols] = r0;
                 else
                   out[stride * n_cols] += r0;
               }
             else if (contract_over_rows == false && nn % 2 == 1)
               {
                 Number r0;
                 if (mid > 0)
                   {
                     if (type == 1)
                       {
                         r0 = shapes[n_cols * offset] * xm[0];
                         for (int ind = 1; ind < mid; ++ind)
                           r0 += shapes[n_cols * offset + ind] * xm[ind];
                       }
                     else
                       {
                         r0 = shapes[n_cols * offset] * xp[0];
                         for (int ind = 1; ind < mid; ++ind)
                           r0 += shapes[n_cols * offset + ind] * xp[ind];
                       }
                   }
                 else
                   r0 = Number();

                 if ((type == 0 || type == 2) && mm % 2 == 1)
                   r0 += shapes[n_cols * offset + mid] * xmid;

                 if (add == false)
                   out[stride * n_cols] = r0;
                 else
                   out[stride * n_cols] += r0;
               }
             if (one_line == false)
               {
                 in += 1;
                 out += 1;
               }
           }
         if (one_line == false)
           {
             in += stride * (mm - 1);
             out += stride * (nn - 1);
           }
       }
   }


   template <int dim,
             int n_rows,
             int n_columns,
             typename Number,
             typename Number2>
   struct EvaluatorTensorProduct<evaluate_symmetric_hierarchical,
                                 dim,
                                 n_rows,
                                 n_columns,
                                 Number,
                                 Number2>
   {
     static constexpr unsigned int n_rows_of_product =
       Utilities::pow(n_rows, dim);
     static constexpr unsigned int n_columns_of_product =
       Utilities::pow(n_columns, dim);

     EvaluatorTensorProduct()
       : shape_values(nullptr)
       , shape_gradients(nullptr)
       , shape_hessians(nullptr)
     {}

     EvaluatorTensorProduct(const AlignedVector<Number> &shape_values)
       : shape_values(shape_values.begin())
       , shape_gradients(nullptr)
       , shape_hessians(nullptr)
     {}

     EvaluatorTensorProduct(const AlignedVector<Number2> &shape_values,
                            const AlignedVector<Number2> &shape_gradients,
                            const AlignedVector<Number2> &shape_hessians,
                            const unsigned int            dummy1 = 0,
                            const unsigned int            dummy2 = 0)
       : shape_values(shape_values.begin())
       , shape_gradients(shape_gradients.begin())
       , shape_hessians(shape_hessians.begin())
     {
       (void)dummy1;
       (void)dummy2;
     }

     template <int direction, bool contract_over_rows, bool add>
     void
     values(const Number in[], Number out[]) const
     {
       Assert(shape_values != nullptr, ExcNotInitialized());
       apply<direction, contract_over_rows, add, 0>(shape_values, in, out);
     }

     template <int direction, bool contract_over_rows, bool add>
     void
     gradients(const Number in[], Number out[]) const
     {
       Assert(shape_gradients != nullptr, ExcNotInitialized());
       apply<direction, contract_over_rows, add, 1>(shape_gradients, in, out);
     }

     template <int direction, bool contract_over_rows, bool add>
     void
     hessians(const Number in[], Number out[]) const
     {
       Assert(shape_hessians != nullptr, ExcNotInitialized());
       apply<direction, contract_over_rows, add, 0>(shape_hessians, in, out);
     }

     template <int direction, bool contract_over_rows, bool add>
     void
     values_one_line(const Number in[], Number out[]) const
     {
       Assert(shape_values != nullptr, ExcNotInitialized());
       apply<direction, contract_over_rows, add, 0, true>(shape_values, in, out);
     }

     template <int direction, bool contract_over_rows, bool add>
     void
     gradients_one_line(const Number in[], Number out[]) const
     {
       Assert(shape_gradients != nullptr, ExcNotInitialized());
       apply<direction, contract_over_rows, add, 1, true>(shape_gradients,
                                                          in,
                                                          out);
     }

     template <int direction, bool contract_over_rows, bool add>
     void
     hessians_one_line(const Number in[], Number out[]) const
     {
       Assert(shape_hessians != nullptr, ExcNotInitialized());
       apply<direction, contract_over_rows, add, 0, true>(shape_hessians,
                                                          in,
                                                          out);
     }

     template <int  direction,
               bool contract_over_rows,
               bool add,
               int  type,
               bool one_line = false>
     static void
     apply(const Number2 *DEAL_II_RESTRICT shape_data,
           const Number *                  in,
           Number *                        out);

     const Number2 *shape_values;
     const Number2 *shape_gradients;
     const Number2 *shape_hessians;
   };


   template <int dim,
             int n_rows,
             int n_columns,
             typename Number,
             typename Number2>
   template <int  direction,
             bool contract_over_rows,
             bool add,
             int  type,
             bool one_line>
   inline void
   EvaluatorTensorProduct<evaluate_symmetric_hierarchical,
                          dim,
                          n_rows,
                          n_columns,
                          Number,
                          Number2>::apply(const Number2 *DEAL_II_RESTRICT shapes,
                                          const Number *                  in,
                                          Number *                        out)
   {
     static_assert(one_line == false || direction == dim - 1,
                   "Single-line evaluation only works for direction=dim-1.");
     static_assert(
       type == 0 || type == 1,
       "Only types 0 and 1 implemented for evaluate_symmetric_hierarchical.");
     Assert(dim == direction + 1 || one_line == true || n_rows == n_columns ||
              in != out,
            ExcMessage("In-place operation only supported for "
                       "n_rows==n_columns or single-line interpolation"));

     // We cannot statically assert that direction is less than dim, so must do
     // an additional dynamic check
     AssertIndexRange(direction, dim);

     constexpr int nn     = contract_over_rows ? n_columns : n_rows;
     constexpr int mm     = contract_over_rows ? n_rows : n_columns;
     constexpr int n_cols = nn / 2;
     constexpr int mid    = mm / 2;

     constexpr int stride    = Utilities::pow(n_columns, direction);
     constexpr int n_blocks1 = one_line ? 1 : stride;
     constexpr int n_blocks2 =
       Utilities::pow(n_rows, (direction >= dim) ? 0 : (dim - direction - 1));

     // this code may look very inefficient at first sight due to the many
     // different cases with if's at the innermost loop part, but all of the
     // conditionals can be evaluated at compile time because they are
     // templates, so the compiler should optimize everything away
     for (int i2 = 0; i2 < n_blocks2; ++i2)
       {
         for (int i1 = 0; i1 < n_blocks1; ++i1)
           {
             if (contract_over_rows)
               {
                 Number x[mm];
                 for (unsigned int i = 0; i < mm; ++i)
                   x[i] = in[stride * i];
                 for (unsigned int col = 0; col < n_cols; ++col)
                   {
                     Number r0, r1;
                     if (mid > 0)
                       {
                         r0 = shapes[col] * x[0];
                         r1 = shapes[col + n_columns] * x[1];
                         for (unsigned int ind = 1; ind < mid; ++ind)
                           {
                             r0 +=
                               shapes[col + 2 * ind * n_columns] * x[2 * ind];
                             r1 += shapes[col + (2 * ind + 1) * n_columns] *
                                   x[2 * ind + 1];
                           }
                       }
                     else
                       r0 = r1 = Number();
                     if (mm % 2 == 1)
                       r0 += shapes[col + (mm - 1) * n_columns] * x[mm - 1];
                     if (add == false)
                       {
                         out[stride * col] = r0 + r1;
                         if (type == 1)
                           out[stride * (nn - 1 - col)] = r1 - r0;
                         else
                           out[stride * (nn - 1 - col)] = r0 - r1;
                       }
                     else
                       {
                         out[stride * col] += r0 + r1;
                         if (type == 1)
                           out[stride * (nn - 1 - col)] += r1 - r0;
                         else
                           out[stride * (nn - 1 - col)] += r0 - r1;
                       }
                   }
                 if (nn % 2 == 1)
                   {
                     Number             r0;
                     const unsigned int shift = type == 1 ? 1 : 0;
                     if (mid > 0)
                       {
                         r0 = shapes[n_cols + shift * n_columns] * x[shift];
                         for (unsigned int ind = 1; ind < mid; ++ind)
                           r0 += shapes[n_cols + (2 * ind + shift) * n_columns] *
                                 x[2 * ind + shift];
                       }
                     else
                       r0 = 0;
                     if (type != 1 && mm % 2 == 1)
                       r0 += shapes[n_cols + (mm - 1) * n_columns] * x[mm - 1];
                     if (add == false)
                       out[stride * n_cols] = r0;
                     else
                       out[stride * n_cols] += r0;
                   }
               }
             else
               {
                 Number xp[mid + 1], xm[mid > 0 ? mid : 1];
                 for (int i = 0; i < mid; ++i)
                   if (type == 0)
                     {
                       xp[i] = in[stride * i] + in[stride * (mm - 1 - i)];
                       xm[i] = in[stride * i] - in[stride * (mm - 1 - i)];
                     }
                   else
                     {
                       xp[i] = in[stride * i] - in[stride * (mm - 1 - i)];
                       xm[i] = in[stride * i] + in[stride * (mm - 1 - i)];
                     }
                 if (mm % 2 == 1)
                   xp[mid] = in[stride * mid];
                 for (unsigned int col = 0; col < n_cols; ++col)
                   {
                     Number r0, r1;
                     if (mid > 0)
                       {
                         r0 = shapes[2 * col * n_columns] * xp[0];
                         r1 = shapes[(2 * col + 1) * n_columns] * xm[0];
                         for (unsigned int ind = 1; ind < mid; ++ind)
                           {
                             r0 += shapes[2 * col * n_columns + ind] * xp[ind];
                             r1 +=
                               shapes[(2 * col + 1) * n_columns + ind] * xm[ind];
                           }
                       }
                     else
                       r0 = r1 = Number();
                     if (mm % 2 == 1)
                       {
                         if (type == 1)
                           r1 +=
                             shapes[(2 * col + 1) * n_columns + mid] * xp[mid];
                         else
                           r0 += shapes[2 * col * n_columns + mid] * xp[mid];
                       }
                     if (add == false)
                       {
                         out[stride * (2 * col)]     = r0;
                         out[stride * (2 * col + 1)] = r1;
                       }
                     else
                       {
                         out[stride * (2 * col)] += r0;
                         out[stride * (2 * col + 1)] += r1;
                       }
                   }
                 if (nn % 2 == 1)
                   {
                     Number r0;
                     if (mid > 0)
                       {
                         r0 = shapes[(nn - 1) * n_columns] * xp[0];
                         for (unsigned int ind = 1; ind < mid; ++ind)
                           r0 += shapes[(nn - 1) * n_columns + ind] * xp[ind];
                       }
                     else
                       r0 = Number();
                     if (mm % 2 == 1 && type == 0)
                       r0 += shapes[(nn - 1) * n_columns + mid] * xp[mid];
                     if (add == false)
                       out[stride * (nn - 1)] = r0;
                     else
                       out[stride * (nn - 1)] += r0;
                   }
               }
             if (one_line == false)
               {
                 in += 1;
                 out += 1;
               }
           }
         if (one_line == false)
           {
             in += stride * (mm - 1);
             out += stride * (nn - 1);
           }
       }
   }

 } // end of namespace internal


 DEAL_II_NAMESPACE_CLOSE

 #endif
numbers::invalid_unsigned_int
static const unsigned int invalid_unsigned_int
Definition: types.h:173

AssertDimension
#define AssertDimension(dim1, dim2)
Definition: exceptions.h:1366

AlignedVector
Definition: aligned_vector.h:61

Utilities::pow
constexpr unsigned int pow(const unsigned int base, const unsigned int iexp)
Definition: utilities.h:353

internal::evaluate_symmetric
Definition: tensor_product_kernels.h:48

internal::EvaluatorTensorProduct< evaluate_evenodd, dim, n_rows, n_columns, Number, Number2 >::EvaluatorTensorProduct
EvaluatorTensorProduct(const AlignedVector< Number2 > &shape_values, const AlignedVector< Number2 > &shape_gradients, const AlignedVector< Number2 > &shape_hessians, const unsigned int dummy1=0, const unsigned int dummy2=0)
Definition: tensor_product_kernels.h:1494

AssertIndexRange
#define AssertIndexRange(index, range)
Definition: exceptions.h:1407

StandardExceptions::ExcNotInitialized
static::ExceptionBase & ExcNotInitialized()

AlignedVector::size
size_type size() const

Utilities::fixed_power
T fixed_power(const T t)
Definition: utilities.h:912

internal::evaluate_symmetric_hierarchical
Definition: tensor_product_kernels.h:65

internal::EvaluatorTensorProduct< evaluate_symmetric_hierarchical, dim, n_rows, n_columns, Number, Number2 >::EvaluatorTensorProduct
EvaluatorTensorProduct(const AlignedVector< Number > &shape_values)
Definition: tensor_product_kernels.h:1871

StandardExceptions::ExcMessage
static::ExceptionBase & ExcMessage(std::string arg1)

internal::EvaluatorTensorProduct< evaluate_symmetric_hierarchical, dim, n_rows, n_columns, Number, Number2 >::EvaluatorTensorProduct
EvaluatorTensorProduct(const AlignedVector< Number2 > &shape_values, const AlignedVector< Number2 > &shape_gradients, const AlignedVector< Number2 > &shape_hessians, const unsigned int dummy1=0, const unsigned int dummy2=0)
Definition: tensor_product_kernels.h:1881

Assert
#define Assert(cond, exc)
Definition: exceptions.h:1227

StandardExceptions::ExcDimensionMismatch
static::ExceptionBase & ExcDimensionMismatch(std::size_t arg1, std::size_t arg2)

internal::EvaluatorTensorProduct< evaluate_symmetric_hierarchical, dim, n_rows, n_columns, Number, Number2 >::EvaluatorTensorProduct
EvaluatorTensorProduct()
Definition: tensor_product_kernels.h:1861

internal::EvaluatorTensorProduct< evaluate_general, dim, n_rows, n_columns, Number, Number2 >::EvaluatorTensorProduct
EvaluatorTensorProduct()
Definition: tensor_product_kernels.h:139

internal::EvaluatorTensorProduct< evaluate_general, dim, 0, 0, Number, Number2 >::EvaluatorTensorProduct
EvaluatorTensorProduct(const AlignedVector< Number2 > &shape_values, const AlignedVector< Number2 > &shape_gradients, const AlignedVector< Number2 > &shape_hessians, const unsigned int n_rows, const unsigned int n_columns)
Definition: tensor_product_kernels.h:551

internal::EvaluatorTensorProduct< evaluate_general, dim, n_rows, n_columns, Number, Number2 >::EvaluatorTensorProduct
EvaluatorTensorProduct(const AlignedVector< Number2 > &shape_values, const AlignedVector< Number2 > &shape_gradients, const AlignedVector< Number2 > &shape_hessians, const unsigned int dummy1=0, const unsigned int dummy2=0)
Definition: tensor_product_kernels.h:148

internal::EvaluatorVariant
EvaluatorVariant
Definition: tensor_product_kernels.h:36

internal
Definition: aligned_vector.h:345

internal::evaluate_general
Definition: tensor_product_kernels.h:42

internal::EvaluatorTensorProduct< evaluate_evenodd, dim, n_rows, n_columns, Number, Number2 >::EvaluatorTensorProduct
EvaluatorTensorProduct(const AlignedVector< Number2 > &shape_values)
Definition: tensor_product_kernels.h:1482

internal::EvaluatorTensorProduct< evaluate_symmetric, dim, n_rows, n_columns, Number, Number2 >::EvaluatorTensorProduct
EvaluatorTensorProduct(const AlignedVector< Number2 > &shape_values, const AlignedVector< Number2 > &shape_gradients, const AlignedVector< Number2 > &shape_hessians, const unsigned int dummy1=0, const unsigned int dummy2=0)
Definition: tensor_product_kernels.h:890

internal::EvaluatorTensorProduct< evaluate_general, dim, 0, 0, Number, Number2 >::EvaluatorTensorProduct
EvaluatorTensorProduct()
Definition: tensor_product_kernels.h:540

numbers
Definition: numbers.h:113

internal::EvaluatorTensorProduct< evaluate_evenodd, dim, n_rows, n_columns, Number, Number2 >::EvaluatorTensorProduct
EvaluatorTensorProduct()
Definition: tensor_product_kernels.h:1472

StandardExceptions::ExcNotImplemented
static::ExceptionBase & ExcNotImplemented()

internal::EvaluatorTensorProduct
Definition: tensor_product_kernels.h:96

AlignedVector::empty
bool empty() const

internal::evaluate_evenodd
Definition: tensor_product_kernels.h:54