diff --git a/cpp/basix/dof-transformations.cpp b/cpp/basix/dof-transformations.cpp index f842ac5ca..42793a841 100644 --- a/cpp/basix/dof-transformations.cpp +++ b/cpp/basix/dof-transformations.cpp @@ -1,4 +1,4 @@ -// Copyright (c) 2020 Chris Richardson & Matthew Scroggs +// Copyright (c) 2020-2024 Chris Richardson, Matthew Scroggs and Garth N. Wells // FEniCS Project // SPDX-License-Identifier: MIT @@ -15,6 +15,8 @@ using namespace basix; +namespace +{ namespace stdex = MDSPAN_IMPL_STANDARD_NAMESPACE::MDSPAN_IMPL_PROPOSED_NAMESPACE; template @@ -32,18 +34,13 @@ using map_data_t template using mapinfo_t = std::map>>; -namespace -{ //----------------------------------------------------------------------------- int find_first_subentity(cell::type cell_type, cell::type entity_type) { const int edim = cell::topological_dimension(entity_type); std::vector entities = cell::subentity_types(cell_type)[edim]; - if (auto it = std::ranges::find(entities, entity_type); - it != entities.end()) - { + if (auto it = std::ranges::find(entities, entity_type); it != entities.end()) return std::distance(entities.begin(), it); - } else throw std::runtime_error("Entity not found"); } @@ -93,13 +90,11 @@ mapinfo_t get_mapinfo(cell::type cell_type) { mapinfo_t mapinfo; auto& data = mapinfo.try_emplace(cell::type::interval).first->second; - auto map = [](auto pt) -> std::array { - return {pt[1], pt[0], 0.0}; - }; + auto map = [](auto pt) -> std::array { return {pt[1], pt[0], 0}; }; stdex::mdarray> J( stdex::extents{}, {0., 1., 1., 0.}); - T detJ = -1.0; + T detJ = -1; stdex::mdarray> K( stdex::extents{}, {0., 1., 1., 0.}); data.push_back(std::tuple(map, J, detJ, K)); @@ -109,9 +104,8 @@ mapinfo_t get_mapinfo(cell::type cell_type) { mapinfo_t mapinfo; auto& data = mapinfo.try_emplace(cell::type::interval).first->second; - auto map = [](auto pt) -> std::array { - return {1 - pt[0], pt[1], 0}; - }; + auto map + = [](auto pt) -> std::array { return {1 - pt[0], pt[1], 0}; }; stdex::mdarray> J( stdex::extents{}, {-1., 0., 0., 1.}); @@ -126,9 +120,8 @@ mapinfo_t get_mapinfo(cell::type cell_type) mapinfo_t mapinfo; { auto& data = mapinfo.try_emplace(cell::type::interval).first->second; - auto map = [](auto pt) -> std::array { - return {pt[0], pt[2], pt[1]}; - }; + auto map + = [](auto pt) -> std::array { return {pt[0], pt[2], pt[1]}; }; stdex::mdarray> J( stdex::extents{}, {1., 0., 0., 0., 0., 1., 0., 1., 0.}); @@ -143,9 +136,8 @@ mapinfo_t get_mapinfo(cell::type cell_type) { auto& data = mapinfo.try_emplace(cell::type::triangle).first->second; { - auto map = [](auto pt) -> std::array { - return {pt[2], pt[0], pt[1]}; - }; + auto map + = [](auto pt) -> std::array { return {pt[2], pt[0], pt[1]}; }; stdex::mdarray> J( stdex::extents{}, {0., 1., 0., 0., 0., 1., 1., 0., 0.}); @@ -157,9 +149,8 @@ mapinfo_t get_mapinfo(cell::type cell_type) data.push_back(std::tuple(map, J, detJ, K)); } { - auto map = [](auto pt) -> std::array { - return {pt[0], pt[2], pt[1]}; - }; + auto map + = [](auto pt) -> std::array { return {pt[0], pt[2], pt[1]}; }; stdex::mdarray> J( stdex::extents{}, {1., 0., 0., 0., 0., 1., 0., 1., 0.}); @@ -179,9 +170,8 @@ mapinfo_t get_mapinfo(cell::type cell_type) mapinfo_t mapinfo; { auto& data = mapinfo.try_emplace(cell::type::interval).first->second; - auto map = [](auto pt) -> std::array { - return {1 - pt[0], pt[1], pt[2]}; - }; + auto map = [](auto pt) -> std::array + { return {1 - pt[0], pt[1], pt[2]}; }; stdex::mdarray> J( stdex::extents{}, {-1., 0., 0., 0., 1., 0., 0., 0., 1.}); @@ -196,9 +186,8 @@ mapinfo_t get_mapinfo(cell::type cell_type) { auto& data = mapinfo.try_emplace(cell::type::quadrilateral).first->second; { - auto map = [](auto pt) -> std::array { - return {1 - pt[1], pt[0], pt[2]}; - }; + auto map = [](auto pt) -> std::array + { return {1 - pt[1], pt[0], pt[2]}; }; stdex::mdarray> J( stdex::extents{}, {0., 1., 0., -1., 0., 0., 0., 0., 1.}); @@ -210,9 +199,8 @@ mapinfo_t get_mapinfo(cell::type cell_type) data.push_back(std::tuple(map, J, detJ, K)); } { - auto map = [](auto pt) -> std::array { - return {pt[1], pt[0], pt[2]}; - }; + auto map + = [](auto pt) -> std::array { return {pt[1], pt[0], pt[2]}; }; stdex::mdarray> J( stdex::extents{}, {0., 1., 0., 1., 0., 0., 0., 0., 1.}); @@ -231,9 +219,8 @@ mapinfo_t get_mapinfo(cell::type cell_type) mapinfo_t mapinfo; { auto& data = mapinfo.try_emplace(cell::type::interval).first->second; - auto map = [](auto pt) -> std::array { - return {1 - pt[0], pt[1], pt[2]}; - }; + auto map = [](auto pt) -> std::array + { return {1 - pt[0], pt[1], pt[2]}; }; stdex::mdarray> J( stdex::extents{}, {-1., 0., 0., 0., 1., 0., 0., 0., 1.}); @@ -246,9 +233,8 @@ mapinfo_t get_mapinfo(cell::type cell_type) { auto& data = mapinfo.try_emplace(cell::type::triangle).first->second; { - auto map = [](auto pt) -> std::array { - return {1 - pt[1] - pt[0], pt[0], pt[2]}; - }; + auto map = [](auto pt) -> std::array + { return {1 - pt[1] - pt[0], pt[0], pt[2]}; }; stdex::mdarray> J( stdex::extents{}, {0., 1., 0., -1., -1., 0., 0., 0., 1.}); @@ -259,9 +245,8 @@ mapinfo_t get_mapinfo(cell::type cell_type) data.push_back(std::tuple(map, J, detJ, K)); } { - auto map = [](auto pt) -> std::array { - return {pt[1], pt[0], pt[2]}; - }; + auto map + = [](auto pt) -> std::array { return {pt[1], pt[0], pt[2]}; }; stdex::mdarray> J( stdex::extents{}, {0., 1., 0., 1., 0., 0., 0., 0., 1.}); @@ -275,9 +260,8 @@ mapinfo_t get_mapinfo(cell::type cell_type) { auto& data = mapinfo.try_emplace(cell::type::quadrilateral).first->second; { - auto map = [](auto pt) -> std::array { - return {1 - pt[2], pt[1], pt[0]}; - }; + auto map = [](auto pt) -> std::array + { return {1 - pt[2], pt[1], pt[0]}; }; stdex::mdarray> J( stdex::extents{}, {0., 0., 1., 0., 1., 0., -1., 0., 0.}); @@ -288,9 +272,8 @@ mapinfo_t get_mapinfo(cell::type cell_type) data.push_back(std::tuple(map, J, detJ, K)); } { // scope - auto map = [](auto pt) -> std::array { - return {pt[2], pt[1], pt[0]}; - }; + auto map + = [](auto pt) -> std::array { return {pt[2], pt[1], pt[0]}; }; stdex::mdarray> J( stdex::extents{}, {0., 0., 1., 0., 1., 0., 1., 0., 0.}); @@ -309,9 +292,8 @@ mapinfo_t get_mapinfo(cell::type cell_type) mapinfo_t mapinfo; { auto& data = mapinfo.try_emplace(cell::type::interval).first->second; - auto map = [](auto pt) -> std::array { - return {1 - pt[0], pt[1], pt[2]}; - }; + auto map = [](auto pt) -> std::array + { return {1 - pt[0], pt[1], pt[2]}; }; stdex::mdarray> J( stdex::extents{}, {-1., 0., 0., 0., 1., 0., 0., 0., 1.}); @@ -325,9 +307,8 @@ mapinfo_t get_mapinfo(cell::type cell_type) { auto& data = mapinfo.try_emplace(cell::type::quadrilateral).first->second; { - auto map = [](auto pt) -> std::array { - return {1 - pt[1], pt[0], pt[2]}; - }; + auto map = [](auto pt) -> std::array + { return {1 - pt[1], pt[0], pt[2]}; }; stdex::mdarray> J( stdex::extents{}, {0., 1., 0., -1., 0., 0., 0., 0., 1.}); @@ -338,9 +319,8 @@ mapinfo_t get_mapinfo(cell::type cell_type) data.push_back(std::tuple(map, J, detJ, K)); } { - auto map = [](auto pt) -> std::array { - return {pt[1], pt[0], pt[2]}; - }; + auto map + = [](auto pt) -> std::array { return {pt[1], pt[0], pt[2]}; }; stdex::mdarray> J( stdex::extents{}, {0., 1., 0., 1., 0., 0., 0., 0., 1.}); @@ -355,9 +335,8 @@ mapinfo_t get_mapinfo(cell::type cell_type) { auto& data = mapinfo.try_emplace(cell::type::triangle).first->second; { - auto map = [](auto pt) -> std::array { - return {1 - pt[2] - pt[0], pt[1], pt[0]}; - }; + auto map = [](auto pt) -> std::array + { return {1 - pt[2] - pt[0], pt[1], pt[0]}; }; stdex::mdarray> J( stdex::extents{}, {0., 0., 1., 0., 1., 0., -1., 0., -1.}); @@ -368,9 +347,8 @@ mapinfo_t get_mapinfo(cell::type cell_type) data.push_back(std::tuple(map, J, detJ, K)); } { - auto map = [](auto pt) -> std::array { - return {pt[2], pt[1], pt[0]}; - }; + auto map + = [](auto pt) -> std::array { return {pt[2], pt[1], pt[0]}; }; stdex::mdarray> J( stdex::extents{}, {0., 0., 1., 0., 1., 0., 1., 0., 0.}); @@ -391,13 +369,12 @@ mapinfo_t get_mapinfo(cell::type cell_type) //----------------------------------------------------------------------------- template std::pair, std::array> compute_transformation( - cell::type cell_type, - const std::array>, 4>& x, - const std::array>, 4>& M, + cell::type cell_type, std::array>, 4> x, + std::array>, 4> M, mdspan_t coeffs, const mdarray_t& J, T detJ, const mdarray_t& K, - const std::function(std::span)> map_point, - int degree, int tdim, int entity, std::size_t vs, const maps::type map_type, + std::function(std::span)> map_point, int degree, + int tdim, int entity, std::size_t vs, const maps::type map_type, const polyset::type ptype) { if (x[tdim].size() == 0 or x[tdim][entity].extent(0) == 0) @@ -426,7 +403,7 @@ std::pair, std::array> compute_transformation( mdarray_t mapped_pts(pts.extents()); for (std::size_t p = 0; p < mapped_pts.extent(0); ++p) { - auto mp = map_point( + std::array mp = map_point( std::span(pts.data_handle() + p * pts.extent(1), pts.extent(1))); for (std::size_t k = 0; k < mapped_pts.extent(1); ++k) mapped_pts(p, k) = mp[k]; @@ -439,18 +416,27 @@ std::pair, std::array> compute_transformation( mdspan_t polyset_vals(polyset_vals_b.data(), polyset_shape[1], polyset_shape[2]); - mdarray_t tabulated_data(npts, total_ndofs, vs); + std::vector tabulated_data_b(npts * total_ndofs * vs); + mdspan_t tabulated_data(tabulated_data_b.data(), npts, total_ndofs, vs); + std::vector result_b(polyset_vals.extent(1) * coeffs.extent(0)); + mdspan_t result(result_b.data(), coeffs.extent(0), + polyset_vals.extent(1)); + std::vector coeffs_b(coeffs.extent(0) * polyset_vals.extent(0)); + mdspan_t _coeffs(coeffs_b.data(), coeffs.extent(0), + polyset_vals.extent(0)); for (std::size_t j = 0; j < vs; ++j) { - mdarray_t result(polyset_vals.extent(1), coeffs.extent(0)); for (std::size_t k0 = 0; k0 < coeffs.extent(0); ++k0) - for (std::size_t k1 = 0; k1 < polyset_vals.extent(1); ++k1) - for (std::size_t k2 = 0; k2 < polyset_vals.extent(0); ++k2) - result(k1, k0) += coeffs(k0, k2 + psize * j) * polyset_vals(k2, k1); - - for (std::size_t k0 = 0; k0 < result.extent(0); ++k0) - for (std::size_t k1 = 0; k1 < result.extent(1); ++k1) - tabulated_data(k0, k1, j) = result(k0, k1); + for (std::size_t k2 = 0; k2 < polyset_vals.extent(0); ++k2) + _coeffs(k0, k2) = coeffs(k0, k2 + psize * j); + + // r^t: coeffs.extent(0) x polyset_vals.extent(1) [k0, k1] + // c: coeffs.extent(1) x polyset_vals.extent(0) [k0, k2] + // p: polyset_vals.extent(0) x polyset_vals.extent(1) [k2, k1] + math::dot(_coeffs, polyset_vals, result); + for (std::size_t k0 = 0; k0 < result.extent(1); ++k0) + for (std::size_t k1 = 0; k1 < result.extent(0); ++k1) + tabulated_data(k0, k1, j) = result(k1, k0); } // push forward @@ -460,14 +446,12 @@ std::pair, std::array> compute_transformation( for (std::size_t i = 0; i < npts; ++i) { mdspan_t tab( - tabulated_data.data() + tabulated_data_b.data() + i * tabulated_data.extent(1) * tabulated_data.extent(2), tabulated_data.extent(1), tabulated_data.extent(2)); - push_forward(map_type, mdspan_t(temp_data.data(), temp_data.extents()), tab, J, detJ, K); - for (std::size_t k0 = 0; k0 < temp_data.extent(0); ++k0) for (std::size_t k1 = 0; k1 < temp_data.extent(1); ++k1) pushed_data(i, k0, k1) = temp_data(k0, k1); @@ -477,18 +461,42 @@ std::pair, std::array> compute_transformation( // Interpolate to calculate coefficients std::vector transformb(ndofs * ndofs); mdspan_t transform(transformb.data(), ndofs, ndofs); - for (std::size_t d = 0; d < imat.extent(3); ++d) + + std::vector imat_b(transform.extent(1) * imat.extent(2)); + mdspan_t imat_id(imat_b.data(), transform.extent(1), imat.extent(2)); + + std::vector pushed_datai_b(imat.extent(2) * transform.extent(0)); + mdspan_t pushed_data_i(pushed_datai_b.data(), imat.extent(2), + transform.extent(0)); + + std::vector transformT_b(transform.extent(1) * transform.extent(0)); + mdspan_t transformT(transformT_b.data(), transform.extent(1), + transform.extent(0)); + for (std::size_t i = 0; i < vs; ++i) { - for (std::size_t i = 0; i < vs; ++i) + // Pack pushed_data + for (std::size_t k2 = 0; k2 < imat.extent(2); ++k2) + for (std::size_t k1 = 0; k1 < transform.extent(0); ++k1) + pushed_data_i(k2, k1) = pushed_data(k2, k1 + dofstart, i); + + for (std::size_t d = 0; d < imat.extent(3); ++d) { + // Pack imat for (std::size_t k0 = 0; k0 < transform.extent(1); ++k0) - for (std::size_t k1 = 0; k1 < transform.extent(0); ++k1) - for (std::size_t k2 = 0; k2 < imat.extent(2); ++k2) - transform(k1, k0) - += imat(k0, i, k2, d) * pushed_data(k2, k1 + dofstart, i); + for (std::size_t k2 = 0; k2 < imat.extent(2); ++k2) + imat_id(k0, k2) = imat(k0, i, k2, d); + + // transformT_(k0, k1) = imat_id_(k0, k2) pushed_data_i(k2, k1) + + // transformT_(k0, k1) + math::dot(imat_id, pushed_data_i, transformT, 1, 1); } } + // Transpose 'transformT' -> 'transform' + for (std::size_t k0 = 0; k0 < transform.extent(1); ++k0) + for (std::size_t k1 = 0; k1 < transform.extent(0); ++k1) + transform(k1, k0) = transformT(k0, k1); + return {std::move(transformb), {transform.extent(0), transform.extent(1)}}; } } // namespace @@ -497,14 +505,16 @@ template std::map, std::array>> doftransforms::compute_entity_transformations( cell::type cell_type, - const std::array< + std::array< std::vector>>, - 4>& x, - const std::array< + 4> + x, + std::array< std::vector>>, - 4>& M, + 4> + M, MDSPAN_IMPL_STANDARD_NAMESPACE::mdspan< const T, MDSPAN_IMPL_STANDARD_NAMESPACE::dextents> coeffs, @@ -518,7 +528,6 @@ doftransforms::compute_entity_transformations( const int tdim = cell::topological_dimension(entity_type); const int entity = find_first_subentity(cell_type, entity_type); std::size_t ndofs = M[tdim].size() == 0 ? 0 : M[tdim][entity].extent(0); - std::vector transform; transform.reserve(emap_data.size() * ndofs * ndofs); for (auto& [mapfn, J, detJ, K] : emap_data) @@ -542,14 +551,14 @@ template std::map, std::array>> doftransforms::compute_entity_transformations( cell::type, - const std::array>>, - 4>&, - const std::array>>, - 4>&, + std::array>>, + 4>, + std::array>>, + 4>, MDSPAN_IMPL_STANDARD_NAMESPACE::mdspan< const float, MDSPAN_IMPL_STANDARD_NAMESPACE::dextents>, int, std::size_t, maps::type, polyset::type); @@ -558,14 +567,14 @@ template std::map, std::array>> doftransforms::compute_entity_transformations( cell::type, - const std::array>>, - 4>&, - const std::array>>, - 4>&, + std::array>>, + 4>, + std::array>>, + 4>, MDSPAN_IMPL_STANDARD_NAMESPACE::mdspan< const double, MDSPAN_IMPL_STANDARD_NAMESPACE::dextents>, int, std::size_t, maps::type, polyset::type); diff --git a/cpp/basix/dof-transformations.h b/cpp/basix/dof-transformations.h index 3399c7397..596194606 100644 --- a/cpp/basix/dof-transformations.h +++ b/cpp/basix/dof-transformations.h @@ -40,14 +40,16 @@ template std::map, std::array>> compute_entity_transformations( cell::type cell_type, - const std::array< + std::array< std::vector>>, - 4>& x, - const std::array< + 4> + x, + std::array< std::vector>>, - 4>& M, + 4> + M, MDSPAN_IMPL_STANDARD_NAMESPACE::mdspan< const T, MDSPAN_IMPL_STANDARD_NAMESPACE::dextents> coeffs, diff --git a/cpp/basix/finite-element.cpp b/cpp/basix/finite-element.cpp index e97b804c7..411a61fd2 100644 --- a/cpp/basix/finite-element.cpp +++ b/cpp/basix/finite-element.cpp @@ -23,6 +23,7 @@ #include #include #include + #define str_macro(X) #X #define str(X) str_macro(X) @@ -965,7 +966,7 @@ FiniteElement::FiniteElement( rtot += r; } - constexpr F eps = 10.0 * std::numeric_limits::epsilon(); + const F eps = 10 * degree * degree * std::numeric_limits::epsilon(); if ((trans.extent(2) != 1 and std::abs(rmin) > eps) or std::abs(rmax - 1.0) > eps or std::abs(rtot - 1.0) > eps) { @@ -1436,7 +1437,7 @@ FiniteElement::FiniteElement( for (std::size_t col = 0; col < matM.extent(1); ++col) { F v = col == row ? 1.0 : 0.0; - constexpr F eps = 100 * std::numeric_limits::epsilon(); + const F eps = 10 * degree * degree * std::numeric_limits::epsilon(); if (std::abs(matM(row, col) - v) > eps) { _interpolation_is_identity = false; diff --git a/cpp/basix/finite-element.h b/cpp/basix/finite-element.h index e65ad8769..9fe29e9e4 100644 --- a/cpp/basix/finite-element.h +++ b/cpp/basix/finite-element.h @@ -26,7 +26,6 @@ /// Basix: FEniCS runtime basis evaluation library namespace basix { - namespace impl { template diff --git a/cpp/basix/math.h b/cpp/basix/math.h index 82cb86ff9..ac6fff692 100644 --- a/cpp/basix/math.h +++ b/cpp/basix/math.h @@ -1,4 +1,4 @@ -// Copyright (C) 2021 Igor Baratta +// Copyright (C) 2021-2024 Igor Baratta and Garth N. Wells // // This file is part of DOLFINx (https://www.fenicsproject.org) // @@ -6,6 +6,8 @@ #pragma once +#include "mdspan.hpp" +#include #include #include #include @@ -15,8 +17,6 @@ #include #include -#include "mdspan.hpp" - extern "C" { void ssyevd_(char* jobz, char* uplo, int* n, float* a, int* lda, float* w, @@ -44,20 +44,21 @@ extern "C" /// @brief Mathematical functions. /// -/// @note The functions in this namespace are designed to be called -/// multiple times at runtime, so their performance is critical. +/// @note Functions in this namespace are designed to be called multiple +/// times at runtime, so their performance is critical. namespace basix::math { namespace impl { -/// @brief Compute C = A * B using BLAS. -/// @param[in] A Input matrix -/// @param[in] B Input matrix -/// @return A * B +/// @brief Compute C = alpha A * B + \beta C using BLAS (GEMM). +/// @param[in] A Input matrix. +/// @param[in] B Input matrix. +/// @param[in] alpha +/// @param[in] beta template void dot_blas(std::span A, std::array Ashape, std::span B, std::array Bshape, - std::span C) + std::span C, T alpha = 1, T beta = 0) { static_assert(std::is_same_v or std::is_same_v); @@ -68,8 +69,6 @@ void dot_blas(std::span A, std::array Ashape, int N = Bshape[1]; int K = Ashape[1]; - T alpha = 1; - T beta = 0; int lda = K; int ldb = N; int ldc = N; @@ -89,8 +88,8 @@ void dot_blas(std::span A, std::array Ashape, } // namespace impl /// @brief Compute the outer product of vectors u and v. -/// @param u The first vector -/// @param v The second vector +/// @param u The first vector. +/// @param v The second vector. /// @return The outer product. The type will be the same as `u`. template std::pair, std::array> @@ -103,7 +102,7 @@ outer(const U& u, const V& v) return {std::move(result), {u.size(), v.size()}}; } -/// Compute the cross product u x v +/// @brief Compute the cross product u x v. /// @param u The first vector. It must has size 3. /// @param v The second vector. It must has size 3. /// @return The cross product `u x v`. The type will be the same as `u`. @@ -116,12 +115,13 @@ std::array cross(const U& u, const V& v) u[0] * v[1] - u[1] * v[0]}; } -/// Compute the eigenvalues and eigenvectors of a square Hermitian matrix A -/// @param[in] A Input matrix, row-major storage -/// @param[in] n Number of rows +/// @brief Compute the eigenvalues and eigenvectors of a square +/// Hermitian matrix A. +/// @param[in] A Input matrix, row-major storage. +/// @param[in] n Number of rows. /// @return Eigenvalues (0) and eigenvectors (1). The eigenvector array /// uses column-major storage, which each column being an eigenvector. -/// @pre The matrix `A` must be symmetric +/// @pre The matrix `A` must be symmetric. template std::pair, std::vector> eigh(std::span A, std::size_t n) @@ -179,9 +179,9 @@ std::pair, std::vector> eigh(std::span A, } /// @brief Solve A X = B. -/// @param[in] A The matrix -/// @param[in] B Right-hand side matrix/vector -/// @return A^{-1} B +/// @param[in] A The matrix. +/// @param[in] B Right-hand side matrix/vector.@brief +/// @return A^{-1} B. template std::vector solve(MDSPAN_IMPL_STANDARD_NAMESPACE::mdspan< @@ -209,6 +209,7 @@ solve(MDSPAN_IMPL_STANDARD_NAMESPACE::mdspan< int nrhs = _B.extent(1); int lda = _A.extent(0); int ldb = _B.extent(0); + // Pivot indices that define the permutation matrix for the LU solver std::vector piv(N); int info; @@ -231,9 +232,9 @@ solve(MDSPAN_IMPL_STANDARD_NAMESPACE::mdspan< return rb; } -/// @brief Check if A is a singular matrix, -/// @param[in] A The matrix -/// @return A bool indicating if the matrix is singular +/// @brief Check if A is a singular matrix. +/// @param[in] A The matrix. +/// @return A bool indicating if the matrix is singular. template bool is_singular( MDSPAN_IMPL_STANDARD_NAMESPACE::mdspan< @@ -277,9 +278,9 @@ bool is_singular( /// @brief Compute the LU decomposition of the transpose of a square /// matrix A. -/// @param[in,out] A The matrix +/// @param[in,out] A The matrix. /// @return The LU permutation, in prepared format (see -/// precompute::prepare_permutation) +/// precompute::prepare_permutation). template std::vector transpose_lu(std::pair, std::array>& A) @@ -309,38 +310,58 @@ transpose_lu(std::pair, std::array>& A) return perm; } -/// @brief Compute C = A * B. +/// @brief Compute C = \alpha A * B + \beta C /// @param[in] A Input matrix /// @param[in] B Input matrix /// @param[out] C Output matrix. Must be sized correctly before calling /// this function. +/// @param[in] alpha +/// @param[in] beta template -void dot(const U& A, const V& B, W&& C) +void dot(const U& A, const V& B, W&& C, + typename std::decay_t::value_type alpha = 1, + typename std::decay_t::value_type beta = 0) { + using T = typename std::decay_t::value_type; + assert(A.extent(1) == B.extent(0)); assert(C.extent(0) == A.extent(0)); assert(C.extent(1) == B.extent(1)); - if (A.extent(0) * B.extent(1) * A.extent(1) < 512) + if (A.extent(0) * B.extent(1) * A.extent(1) < 256) { - std::fill_n(C.data_handle(), C.extent(0) * C.extent(1), 0); for (std::size_t i = 0; i < A.extent(0); ++i) + { for (std::size_t j = 0; j < B.extent(1); ++j) + { + T C0 = C(i, j); + C(i, j) = 0; + T& _C = C(i, j); for (std::size_t k = 0; k < A.extent(1); ++k) - C(i, j) += A(i, k) * B(k, j); + _C += A(i, k) * B(k, j); + _C = alpha * _C + beta * C0; + } + } } else { - using T = typename std::decay_t::value_type; + static_assert(std::is_same_v::layout_type, + MDSPAN_IMPL_STANDARD_NAMESPACE::layout_right>); + static_assert(std::is_same_v::layout_type, + MDSPAN_IMPL_STANDARD_NAMESPACE::layout_right>); + static_assert(std::is_same_v::layout_type, + MDSPAN_IMPL_STANDARD_NAMESPACE::layout_right>); + static_assert(std::is_same_v::value_type, T>); + static_assert(std::is_same_v::value_type, T>); impl::dot_blas( std::span(A.data_handle(), A.size()), {A.extent(0), A.extent(1)}, std::span(B.data_handle(), B.size()), {B.extent(0), B.extent(1)}, - std::span(C.data_handle(), C.size())); + std::span(C.data_handle(), C.size()), alpha, beta); } } /// @brief Build an identity matrix. -/// @param[in] n The number of rows/columns -/// @return Identity matrix using row-major storage +/// @param[in] n The number of rows/columns. +/// @return Identity matrix using row-major storage. template std::vector eye(std::size_t n) { @@ -356,7 +377,7 @@ std::vector eye(std::size_t n) } /// @brief Orthogonalise the rows of a matrix (in place). -/// @param[in] wcoeffs The matrix +/// @param[in] wcoeffs The matrix. /// @param[in] start The row to start from. The rows before this should /// already be orthogonal. template @@ -375,8 +396,8 @@ void orthogonalise( norm = std::sqrt(norm); if (norm < 2 * std::numeric_limits::epsilon()) { - throw std::runtime_error( - "Cannot orthogonalise the rows of a matrix with incomplete row rank"); + throw std::runtime_error("Cannot orthogonalise the rows of a matrix " + "with incomplete row rank"); } for (std::size_t k = 0; k < wcoeffs.extent(1); ++k) diff --git a/test/test_lagrange.py b/test/test_lagrange.py index c9db0e336..4c7a2caf8 100644 --- a/test/test_lagrange.py +++ b/test/test_lagrange.py @@ -856,7 +856,7 @@ def test_tet(degree): (basix.CellType.tetrahedron, basix.CellType.tetrahedron), ], ) -@pytest.mark.parametrize("degree", [1, 2, 3, 4]) +@pytest.mark.parametrize("degree", [1, 2, 3, 4, 12]) def test_lagrange(celltype, degree): lagrange = basix.create_element( basix.ElementFamily.P, celltype[1], degree, basix.LagrangeVariant.equispaced @@ -866,7 +866,7 @@ def test_lagrange(celltype, degree): assert np.isclose(np.sum(w, axis=1), 1.0).all() -@pytest.mark.parametrize("degree", [1, 2, 3, 4]) +@pytest.mark.parametrize("degree", [1, 2, 3, 4, 12]) def test_dof_transformations_interval(degree): lagrange = basix.create_element( basix.ElementFamily.P, basix.CellType.interval, degree, basix.LagrangeVariant.equispaced @@ -949,6 +949,31 @@ def test_dof_transformations_tetrahedron(degree): assert np.allclose(t, actual) +@pytest.mark.parametrize( + "celltype", + [ + basix.CellType.interval, + basix.CellType.triangle, + basix.CellType.tetrahedron, + basix.CellType.quadrilateral, + basix.CellType.hexahedron, + ], +) +@pytest.mark.parametrize( + "dtype", + [ + np.float32, + np.float64, + ], +) +def test_lagrange_transform(celltype, dtype): + for p in range(1, 12): + e = basix.create_element( + basix.ElementFamily.P, celltype, p, basix.LagrangeVariant.gll_warped, dtype=dtype + ) + assert e.dof_transformations_are_permutations + + @pytest.mark.parametrize("degree", [1, 2, 3, 4]) @pytest.mark.parametrize( "celltype",