From 6bb7f06ec14981b2acfdfcbcdf1e19b393b938d1 Mon Sep 17 00:00:00 2001 From: "Garth N. Wells" Date: Fri, 15 Nov 2024 18:40:43 +0000 Subject: [PATCH 01/13] Fix tolerances and add test --- cpp/basix/finite-element.cpp | 2 +- cpp/basix/finite-element.h | 1 - test/test_lagrange.py | 13 +++++++++++++ 3 files changed, 14 insertions(+), 2 deletions(-) diff --git a/cpp/basix/finite-element.cpp b/cpp/basix/finite-element.cpp index e97b804c7..7b8e92ca0 100644 --- a/cpp/basix/finite-element.cpp +++ b/cpp/basix/finite-element.cpp @@ -965,7 +965,7 @@ FiniteElement::FiniteElement( rtot += r; } - constexpr F eps = 10.0 * std::numeric_limits::epsilon(); + const F eps = 10 * degree * degree * std::numeric_limits::epsilon(); if ((trans.extent(2) != 1 and std::abs(rmin) > eps) or std::abs(rmax - 1.0) > eps or std::abs(rtot - 1.0) > eps) { diff --git a/cpp/basix/finite-element.h b/cpp/basix/finite-element.h index e65ad8769..9fe29e9e4 100644 --- a/cpp/basix/finite-element.h +++ b/cpp/basix/finite-element.h @@ -26,7 +26,6 @@ /// Basix: FEniCS runtime basis evaluation library namespace basix { - namespace impl { template diff --git a/test/test_lagrange.py b/test/test_lagrange.py index c9db0e336..0d5da1ac2 100644 --- a/test/test_lagrange.py +++ b/test/test_lagrange.py @@ -949,6 +949,19 @@ def test_dof_transformations_tetrahedron(degree): assert np.allclose(t, actual) +@pytest.mark.parametrize( + "celltype", + [basix.CellType.interval, basix.CellType.triangle, basix.CellType.tetrahedron], +) +@pytest.mark.parametrize("dtype", [np.float32, np.float64]) +def test_lagrange_transform(celltype, dtype): + for p in range(1, 15): + e = basix.create_element( + basix.ElementFamily.P, celltype, p, basix.LagrangeVariant.gll_warped, dtype=dtype + ) + assert e.dof_transformations_are_permutations + + @pytest.mark.parametrize("degree", [1, 2, 3, 4]) @pytest.mark.parametrize( "celltype", From 1868dd3c4e4266bc1b017b814f2837de4d07e863 Mon Sep 17 00:00:00 2001 From: "Garth N. Wells" Date: Fri, 15 Nov 2024 18:46:31 +0000 Subject: [PATCH 02/13] Make eps consistent --- cpp/basix/finite-element.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/basix/finite-element.cpp b/cpp/basix/finite-element.cpp index 7b8e92ca0..f2da5a08e 100644 --- a/cpp/basix/finite-element.cpp +++ b/cpp/basix/finite-element.cpp @@ -1436,7 +1436,7 @@ FiniteElement::FiniteElement( for (std::size_t col = 0; col < matM.extent(1); ++col) { F v = col == row ? 1.0 : 0.0; - constexpr F eps = 100 * std::numeric_limits::epsilon(); + const F eps = 10 * degree * degree * std::numeric_limits::epsilon(); if (std::abs(matM(row, col) - v) > eps) { _interpolation_is_identity = false; From eec248988cc3ff5f07b7226b9423a0255481f6fb Mon Sep 17 00:00:00 2001 From: "Garth N. Wells" Date: Fri, 15 Nov 2024 22:09:39 +0000 Subject: [PATCH 03/13] Major speed improvement --- cpp/basix/dof-transformations.cpp | 152 +++++++++++++++++------------- cpp/basix/finite-element.cpp | 1 + test/test_lagrange.py | 18 +++- 3 files changed, 102 insertions(+), 69 deletions(-) diff --git a/cpp/basix/dof-transformations.cpp b/cpp/basix/dof-transformations.cpp index f842ac5ca..e06caeb96 100644 --- a/cpp/basix/dof-transformations.cpp +++ b/cpp/basix/dof-transformations.cpp @@ -13,6 +13,8 @@ #include #include +#include + using namespace basix; namespace stdex @@ -39,8 +41,7 @@ int find_first_subentity(cell::type cell_type, cell::type entity_type) { const int edim = cell::topological_dimension(entity_type); std::vector entities = cell::subentity_types(cell_type)[edim]; - if (auto it = std::ranges::find(entities, entity_type); - it != entities.end()) + if (auto it = std::ranges::find(entities, entity_type); it != entities.end()) { return std::distance(entities.begin(), it); } @@ -93,9 +94,7 @@ mapinfo_t get_mapinfo(cell::type cell_type) { mapinfo_t mapinfo; auto& data = mapinfo.try_emplace(cell::type::interval).first->second; - auto map = [](auto pt) -> std::array { - return {pt[1], pt[0], 0.0}; - }; + auto map = [](auto pt) -> std::array { return {pt[1], pt[0], 0.0}; }; stdex::mdarray> J( stdex::extents{}, {0., 1., 1., 0.}); @@ -109,9 +108,8 @@ mapinfo_t get_mapinfo(cell::type cell_type) { mapinfo_t mapinfo; auto& data = mapinfo.try_emplace(cell::type::interval).first->second; - auto map = [](auto pt) -> std::array { - return {1 - pt[0], pt[1], 0}; - }; + auto map + = [](auto pt) -> std::array { return {1 - pt[0], pt[1], 0}; }; stdex::mdarray> J( stdex::extents{}, {-1., 0., 0., 1.}); @@ -126,9 +124,8 @@ mapinfo_t get_mapinfo(cell::type cell_type) mapinfo_t mapinfo; { auto& data = mapinfo.try_emplace(cell::type::interval).first->second; - auto map = [](auto pt) -> std::array { - return {pt[0], pt[2], pt[1]}; - }; + auto map + = [](auto pt) -> std::array { return {pt[0], pt[2], pt[1]}; }; stdex::mdarray> J( stdex::extents{}, {1., 0., 0., 0., 0., 1., 0., 1., 0.}); @@ -143,9 +140,8 @@ mapinfo_t get_mapinfo(cell::type cell_type) { auto& data = mapinfo.try_emplace(cell::type::triangle).first->second; { - auto map = [](auto pt) -> std::array { - return {pt[2], pt[0], pt[1]}; - }; + auto map + = [](auto pt) -> std::array { return {pt[2], pt[0], pt[1]}; }; stdex::mdarray> J( stdex::extents{}, {0., 1., 0., 0., 0., 1., 1., 0., 0.}); @@ -157,9 +153,8 @@ mapinfo_t get_mapinfo(cell::type cell_type) data.push_back(std::tuple(map, J, detJ, K)); } { - auto map = [](auto pt) -> std::array { - return {pt[0], pt[2], pt[1]}; - }; + auto map + = [](auto pt) -> std::array { return {pt[0], pt[2], pt[1]}; }; stdex::mdarray> J( stdex::extents{}, {1., 0., 0., 0., 0., 1., 0., 1., 0.}); @@ -179,9 +174,8 @@ mapinfo_t get_mapinfo(cell::type cell_type) mapinfo_t mapinfo; { auto& data = mapinfo.try_emplace(cell::type::interval).first->second; - auto map = [](auto pt) -> std::array { - return {1 - pt[0], pt[1], pt[2]}; - }; + auto map = [](auto pt) -> std::array + { return {1 - pt[0], pt[1], pt[2]}; }; stdex::mdarray> J( stdex::extents{}, {-1., 0., 0., 0., 1., 0., 0., 0., 1.}); @@ -196,9 +190,8 @@ mapinfo_t get_mapinfo(cell::type cell_type) { auto& data = mapinfo.try_emplace(cell::type::quadrilateral).first->second; { - auto map = [](auto pt) -> std::array { - return {1 - pt[1], pt[0], pt[2]}; - }; + auto map = [](auto pt) -> std::array + { return {1 - pt[1], pt[0], pt[2]}; }; stdex::mdarray> J( stdex::extents{}, {0., 1., 0., -1., 0., 0., 0., 0., 1.}); @@ -210,9 +203,8 @@ mapinfo_t get_mapinfo(cell::type cell_type) data.push_back(std::tuple(map, J, detJ, K)); } { - auto map = [](auto pt) -> std::array { - return {pt[1], pt[0], pt[2]}; - }; + auto map + = [](auto pt) -> std::array { return {pt[1], pt[0], pt[2]}; }; stdex::mdarray> J( stdex::extents{}, {0., 1., 0., 1., 0., 0., 0., 0., 1.}); @@ -231,9 +223,8 @@ mapinfo_t get_mapinfo(cell::type cell_type) mapinfo_t mapinfo; { auto& data = mapinfo.try_emplace(cell::type::interval).first->second; - auto map = [](auto pt) -> std::array { - return {1 - pt[0], pt[1], pt[2]}; - }; + auto map = [](auto pt) -> std::array + { return {1 - pt[0], pt[1], pt[2]}; }; stdex::mdarray> J( stdex::extents{}, {-1., 0., 0., 0., 1., 0., 0., 0., 1.}); @@ -246,9 +237,8 @@ mapinfo_t get_mapinfo(cell::type cell_type) { auto& data = mapinfo.try_emplace(cell::type::triangle).first->second; { - auto map = [](auto pt) -> std::array { - return {1 - pt[1] - pt[0], pt[0], pt[2]}; - }; + auto map = [](auto pt) -> std::array + { return {1 - pt[1] - pt[0], pt[0], pt[2]}; }; stdex::mdarray> J( stdex::extents{}, {0., 1., 0., -1., -1., 0., 0., 0., 1.}); @@ -259,9 +249,8 @@ mapinfo_t get_mapinfo(cell::type cell_type) data.push_back(std::tuple(map, J, detJ, K)); } { - auto map = [](auto pt) -> std::array { - return {pt[1], pt[0], pt[2]}; - }; + auto map + = [](auto pt) -> std::array { return {pt[1], pt[0], pt[2]}; }; stdex::mdarray> J( stdex::extents{}, {0., 1., 0., 1., 0., 0., 0., 0., 1.}); @@ -275,9 +264,8 @@ mapinfo_t get_mapinfo(cell::type cell_type) { auto& data = mapinfo.try_emplace(cell::type::quadrilateral).first->second; { - auto map = [](auto pt) -> std::array { - return {1 - pt[2], pt[1], pt[0]}; - }; + auto map = [](auto pt) -> std::array + { return {1 - pt[2], pt[1], pt[0]}; }; stdex::mdarray> J( stdex::extents{}, {0., 0., 1., 0., 1., 0., -1., 0., 0.}); @@ -288,9 +276,8 @@ mapinfo_t get_mapinfo(cell::type cell_type) data.push_back(std::tuple(map, J, detJ, K)); } { // scope - auto map = [](auto pt) -> std::array { - return {pt[2], pt[1], pt[0]}; - }; + auto map + = [](auto pt) -> std::array { return {pt[2], pt[1], pt[0]}; }; stdex::mdarray> J( stdex::extents{}, {0., 0., 1., 0., 1., 0., 1., 0., 0.}); @@ -309,9 +296,8 @@ mapinfo_t get_mapinfo(cell::type cell_type) mapinfo_t mapinfo; { auto& data = mapinfo.try_emplace(cell::type::interval).first->second; - auto map = [](auto pt) -> std::array { - return {1 - pt[0], pt[1], pt[2]}; - }; + auto map = [](auto pt) -> std::array + { return {1 - pt[0], pt[1], pt[2]}; }; stdex::mdarray> J( stdex::extents{}, {-1., 0., 0., 0., 1., 0., 0., 0., 1.}); @@ -325,9 +311,8 @@ mapinfo_t get_mapinfo(cell::type cell_type) { auto& data = mapinfo.try_emplace(cell::type::quadrilateral).first->second; { - auto map = [](auto pt) -> std::array { - return {1 - pt[1], pt[0], pt[2]}; - }; + auto map = [](auto pt) -> std::array + { return {1 - pt[1], pt[0], pt[2]}; }; stdex::mdarray> J( stdex::extents{}, {0., 1., 0., -1., 0., 0., 0., 0., 1.}); @@ -338,9 +323,8 @@ mapinfo_t get_mapinfo(cell::type cell_type) data.push_back(std::tuple(map, J, detJ, K)); } { - auto map = [](auto pt) -> std::array { - return {pt[1], pt[0], pt[2]}; - }; + auto map + = [](auto pt) -> std::array { return {pt[1], pt[0], pt[2]}; }; stdex::mdarray> J( stdex::extents{}, {0., 1., 0., 1., 0., 0., 0., 0., 1.}); @@ -355,9 +339,8 @@ mapinfo_t get_mapinfo(cell::type cell_type) { auto& data = mapinfo.try_emplace(cell::type::triangle).first->second; { - auto map = [](auto pt) -> std::array { - return {1 - pt[2] - pt[0], pt[1], pt[0]}; - }; + auto map = [](auto pt) -> std::array + { return {1 - pt[2] - pt[0], pt[1], pt[0]}; }; stdex::mdarray> J( stdex::extents{}, {0., 0., 1., 0., 1., 0., -1., 0., -1.}); @@ -368,9 +351,8 @@ mapinfo_t get_mapinfo(cell::type cell_type) data.push_back(std::tuple(map, J, detJ, K)); } { - auto map = [](auto pt) -> std::array { - return {pt[2], pt[1], pt[0]}; - }; + auto map + = [](auto pt) -> std::array { return {pt[2], pt[1], pt[0]}; }; stdex::mdarray> J( stdex::extents{}, {0., 0., 1., 0., 1., 0., 1., 0., 0.}); @@ -426,7 +408,7 @@ std::pair, std::array> compute_transformation( mdarray_t mapped_pts(pts.extents()); for (std::size_t p = 0; p < mapped_pts.extent(0); ++p) { - auto mp = map_point( + std::array mp = map_point( std::span(pts.data_handle() + p * pts.extent(1), pts.extent(1))); for (std::size_t k = 0; k < mapped_pts.extent(1); ++k) mapped_pts(p, k) = mp[k]; @@ -439,18 +421,56 @@ std::pair, std::array> compute_transformation( mdspan_t polyset_vals(polyset_vals_b.data(), polyset_shape[1], polyset_shape[2]); - mdarray_t tabulated_data(npts, total_ndofs, vs); + std::vector tabulated_data_b(npts * total_ndofs * vs); + mdspan_t tabulated_data(tabulated_data_b.data(), npts, total_ndofs, vs); + // mdarray_t tabulated_data(npts, total_ndofs, vs); + + // std::vector result_b(polyset_vals.extent(1) * coeffs.extent(0)); + // mdspan_t result(result_b.data(), polyset_vals.extent(1), + // coeffs.extent(0)); + + std::vector resultT_b(polyset_vals.extent(1) * coeffs.extent(0)); + mdspan_t resultT(resultT_b.data(), coeffs.extent(0), + polyset_vals.extent(1)); + // mdarray_t result(polyset_vals.extent(1), coeffs.extent(0)); + + // std::cout << "3: transform: " << vs << ", " << coeffs.extent(0) << ", " + // << polyset_vals.extent(1) << ", " << polyset_vals.extent(0) + // << std::endl; + + std::vector coeffs_b(coeffs.extent(0) * polyset_vals.extent(0)); + mdspan_t _coeffs(coeffs_b.data(), coeffs.extent(0), + polyset_vals.extent(0)); for (std::size_t j = 0; j < vs; ++j) { - mdarray_t result(polyset_vals.extent(1), coeffs.extent(0)); + // std::fill(result_b.begin(), result_b.end(), 0); + for (std::size_t k0 = 0; k0 < coeffs.extent(0); ++k0) - for (std::size_t k1 = 0; k1 < polyset_vals.extent(1); ++k1) - for (std::size_t k2 = 0; k2 < polyset_vals.extent(0); ++k2) - result(k1, k0) += coeffs(k0, k2 + psize * j) * polyset_vals(k2, k1); + for (std::size_t k2 = 0; k2 < polyset_vals.extent(0); ++k2) + _coeffs(k0, k2) = coeffs(k0, k2 + psize * j); + + // R^T = coeffs * polyset_vals + // for (std::size_t k0 = 0; k0 < coeffs.extent(0); ++k0) // big + // for (std::size_t k1 = 0; k1 < polyset_vals.extent(1); ++k1) + // for (std::size_t k2 = 0; k2 < polyset_vals.extent(0); ++k2) // big + // result(k1, k0) += _coeffs(k0, k2) * polyset_vals(k2, k1); + // result(k1, k0) += coeffs(k0, k2 + psize * j) * polyset_vals(k2, k1); + + // r^t: coeffs.extent(0) x polyset_vals.extent(1) [k0, k1] + // c: coeffs.extent(1) x polyset_vals.extent(0) [k0, k2] + // p: polyset_vals.extent(0) x polyset_vals.extent(1) [k2, k1] + + math::dot(_coeffs, polyset_vals, resultT); + + // std::cout << "4: transform" << std::endl; + + for (std::size_t k0 = 0; k0 < resultT.extent(1); ++k0) + for (std::size_t k1 = 0; k1 < resultT.extent(0); ++k1) + tabulated_data(k0, k1, j) = resultT(k1, k0); - for (std::size_t k0 = 0; k0 < result.extent(0); ++k0) - for (std::size_t k1 = 0; k1 < result.extent(1); ++k1) - tabulated_data(k0, k1, j) = result(k0, k1); + // for (std::size_t k0 = 0; k0 < result.extent(0); ++k0) + // for (std::size_t k1 = 0; k1 < result.extent(1); ++k1) + // tabulated_data(k0, k1, j) = result(k0, k1); } // push forward @@ -460,7 +480,7 @@ std::pair, std::array> compute_transformation( for (std::size_t i = 0; i < npts; ++i) { mdspan_t tab( - tabulated_data.data() + tabulated_data_b.data() + i * tabulated_data.extent(1) * tabulated_data.extent(2), tabulated_data.extent(1), tabulated_data.extent(2)); diff --git a/cpp/basix/finite-element.cpp b/cpp/basix/finite-element.cpp index f2da5a08e..411a61fd2 100644 --- a/cpp/basix/finite-element.cpp +++ b/cpp/basix/finite-element.cpp @@ -23,6 +23,7 @@ #include #include #include + #define str_macro(X) #X #define str(X) str_macro(X) diff --git a/test/test_lagrange.py b/test/test_lagrange.py index 0d5da1ac2..ca0f4bc35 100644 --- a/test/test_lagrange.py +++ b/test/test_lagrange.py @@ -951,11 +951,23 @@ def test_dof_transformations_tetrahedron(degree): @pytest.mark.parametrize( "celltype", - [basix.CellType.interval, basix.CellType.triangle, basix.CellType.tetrahedron], + [ + basix.CellType.interval, + basix.CellType.triangle, + basix.CellType.tetrahedron, + basix.CellType.quadrilateral, + basix.CellType.hexahedron, + ], +) +@pytest.mark.parametrize( + "dtype", + [ + np.float32, + np.float64, + ], ) -@pytest.mark.parametrize("dtype", [np.float32, np.float64]) def test_lagrange_transform(celltype, dtype): - for p in range(1, 15): + for p in range(1, 12): e = basix.create_element( basix.ElementFamily.P, celltype, p, basix.LagrangeVariant.gll_warped, dtype=dtype ) From ff1dfaa7f49f607eb1203e5930effb4ad507dc0e Mon Sep 17 00:00:00 2001 From: "Garth N. Wells" Date: Fri, 15 Nov 2024 22:53:23 +0000 Subject: [PATCH 04/13] Tidy up --- cpp/basix/dof-transformations.cpp | 32 +++++++++++++------------------ cpp/basix/math.h | 6 ++++++ 2 files changed, 19 insertions(+), 19 deletions(-) diff --git a/cpp/basix/dof-transformations.cpp b/cpp/basix/dof-transformations.cpp index e06caeb96..03b4a77cf 100644 --- a/cpp/basix/dof-transformations.cpp +++ b/cpp/basix/dof-transformations.cpp @@ -423,21 +423,9 @@ std::pair, std::array> compute_transformation( std::vector tabulated_data_b(npts * total_ndofs * vs); mdspan_t tabulated_data(tabulated_data_b.data(), npts, total_ndofs, vs); - // mdarray_t tabulated_data(npts, total_ndofs, vs); - - // std::vector result_b(polyset_vals.extent(1) * coeffs.extent(0)); - // mdspan_t result(result_b.data(), polyset_vals.extent(1), - // coeffs.extent(0)); - - std::vector resultT_b(polyset_vals.extent(1) * coeffs.extent(0)); - mdspan_t resultT(resultT_b.data(), coeffs.extent(0), - polyset_vals.extent(1)); - // mdarray_t result(polyset_vals.extent(1), coeffs.extent(0)); - - // std::cout << "3: transform: " << vs << ", " << coeffs.extent(0) << ", " - // << polyset_vals.extent(1) << ", " << polyset_vals.extent(0) - // << std::endl; - + std::vector result_b(polyset_vals.extent(1) * coeffs.extent(0)); + mdspan_t result(result_b.data(), coeffs.extent(0), + polyset_vals.extent(1)); std::vector coeffs_b(coeffs.extent(0) * polyset_vals.extent(0)); mdspan_t _coeffs(coeffs_b.data(), coeffs.extent(0), polyset_vals.extent(0)); @@ -460,13 +448,13 @@ std::pair, std::array> compute_transformation( // c: coeffs.extent(1) x polyset_vals.extent(0) [k0, k2] // p: polyset_vals.extent(0) x polyset_vals.extent(1) [k2, k1] - math::dot(_coeffs, polyset_vals, resultT); + math::dot(_coeffs, polyset_vals, result); // std::cout << "4: transform" << std::endl; - for (std::size_t k0 = 0; k0 < resultT.extent(1); ++k0) - for (std::size_t k1 = 0; k1 < resultT.extent(0); ++k1) - tabulated_data(k0, k1, j) = resultT(k1, k0); + for (std::size_t k0 = 0; k0 < result.extent(1); ++k0) + for (std::size_t k1 = 0; k1 < result.extent(0); ++k1) + tabulated_data(k0, k1, j) = result(k1, k0); // for (std::size_t k0 = 0; k0 < result.extent(0); ++k0) // for (std::size_t k1 = 0; k1 < result.extent(1); ++k1) @@ -502,10 +490,16 @@ std::pair, std::array> compute_transformation( for (std::size_t i = 0; i < vs; ++i) { for (std::size_t k0 = 0; k0 < transform.extent(1); ++k0) + { for (std::size_t k1 = 0; k1 < transform.extent(0); ++k1) + { for (std::size_t k2 = 0; k2 < imat.extent(2); ++k2) + { transform(k1, k0) += imat(k0, i, k2, d) * pushed_data(k2, k1 + dofstart, i); + } + } + } } } diff --git a/cpp/basix/math.h b/cpp/basix/math.h index 82cb86ff9..1199941b3 100644 --- a/cpp/basix/math.h +++ b/cpp/basix/math.h @@ -330,6 +330,12 @@ void dot(const U& A, const V& B, W&& C) } else { + static_assert(std::is_same_v); + static_assert(std::is_same_v); + // static_assert( + // std::is_same_v, + // std::layout_right>); + using T = typename std::decay_t::value_type; impl::dot_blas( std::span(A.data_handle(), A.size()), {A.extent(0), A.extent(1)}, From 6260efa4dfcd631f7e356511828e05bd157822ee Mon Sep 17 00:00:00 2001 From: "Garth N. Wells" Date: Sat, 16 Nov 2024 09:49:59 +0000 Subject: [PATCH 05/13] Add assertions --- cpp/basix/math.h | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/cpp/basix/math.h b/cpp/basix/math.h index 1199941b3..074f81391 100644 --- a/cpp/basix/math.h +++ b/cpp/basix/math.h @@ -6,6 +6,7 @@ #pragma once +#include #include #include #include @@ -330,13 +331,22 @@ void dot(const U& A, const V& B, W&& C) } else { - static_assert(std::is_same_v); - static_assert(std::is_same_v); - // static_assert( + static_assert(std::is_same_v::layout_type, + std::layout_right>); + static_assert(std::is_same_v::layout_type, + std::layout_right>); + static_assert(std::is_same_v::layout_type, + std::layout_right>); + + // static_assert(std::is_same_v); static_assert(std::is_same_v); static_assert( // std::is_same_v, // std::layout_right>); using T = typename std::decay_t::value_type; + // static_assert(std::is_same_v::value_type, T>); + // static_assert(std::is_same_v::value_type, T>); impl::dot_blas( std::span(A.data_handle(), A.size()), {A.extent(0), A.extent(1)}, std::span(B.data_handle(), B.size()), {B.extent(0), B.extent(1)}, From 869189c6439798bd7216590f22ad778d1a9daa4e Mon Sep 17 00:00:00 2001 From: "Garth N. Wells" Date: Sat, 16 Nov 2024 09:55:04 +0000 Subject: [PATCH 06/13] Assert of float type --- cpp/basix/math.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cpp/basix/math.h b/cpp/basix/math.h index 074f81391..aaffc79d4 100644 --- a/cpp/basix/math.h +++ b/cpp/basix/math.h @@ -345,8 +345,8 @@ void dot(const U& A, const V& B, W&& C) // std::layout_right>); using T = typename std::decay_t::value_type; - // static_assert(std::is_same_v::value_type, T>); - // static_assert(std::is_same_v::value_type, T>); + static_assert(std::is_same_v::value_type, T>); + static_assert(std::is_same_v::value_type, T>); impl::dot_blas( std::span(A.data_handle(), A.size()), {A.extent(0), A.extent(1)}, std::span(B.data_handle(), B.size()), {B.extent(0), B.extent(1)}, From f183dacea14998c498a1eccfe551ff9b3fb6cc5b Mon Sep 17 00:00:00 2001 From: "Garth N. Wells" Date: Sat, 16 Nov 2024 10:05:54 +0000 Subject: [PATCH 07/13] Update assert --- cpp/basix/math.h | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/cpp/basix/math.h b/cpp/basix/math.h index aaffc79d4..851214204 100644 --- a/cpp/basix/math.h +++ b/cpp/basix/math.h @@ -331,12 +331,14 @@ void dot(const U& A, const V& B, W&& C) } else { + // #if defined(_MSC_VER) && !defined(__clang__) static_assert(std::is_same_v::layout_type, - std::layout_right>); + MDSPAN_IMPL_STANDARD_NAMESPACE::layout_left>); static_assert(std::is_same_v::layout_type, - std::layout_right>); + MDSPAN_IMPL_STANDARD_NAMESPACE::layout_left>); static_assert(std::is_same_v::layout_type, - std::layout_right>); + MDSPAN_IMPL_STANDARD_NAMESPACE::layout_left>); + // #endif // static_assert(std::is_same_v); static_assert(std::is_same_v Date: Sat, 16 Nov 2024 10:33:24 +0000 Subject: [PATCH 08/13] Assert fix --- cpp/basix/math.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/cpp/basix/math.h b/cpp/basix/math.h index 851214204..81be508b2 100644 --- a/cpp/basix/math.h +++ b/cpp/basix/math.h @@ -333,11 +333,11 @@ void dot(const U& A, const V& B, W&& C) { // #if defined(_MSC_VER) && !defined(__clang__) static_assert(std::is_same_v::layout_type, - MDSPAN_IMPL_STANDARD_NAMESPACE::layout_left>); + MDSPAN_IMPL_STANDARD_NAMESPACE::layout_right>); static_assert(std::is_same_v::layout_type, - MDSPAN_IMPL_STANDARD_NAMESPACE::layout_left>); + MDSPAN_IMPL_STANDARD_NAMESPACE::layout_right>); static_assert(std::is_same_v::layout_type, - MDSPAN_IMPL_STANDARD_NAMESPACE::layout_left>); + MDSPAN_IMPL_STANDARD_NAMESPACE::layout_right>); // #endif // static_assert(std::is_same_v Date: Sat, 16 Nov 2024 12:20:01 +0000 Subject: [PATCH 09/13] Disable static check for MSVC --- cpp/basix/math.h | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/cpp/basix/math.h b/cpp/basix/math.h index 81be508b2..b859f9572 100644 --- a/cpp/basix/math.h +++ b/cpp/basix/math.h @@ -331,21 +331,14 @@ void dot(const U& A, const V& B, W&& C) } else { - // #if defined(_MSC_VER) && !defined(__clang__) +#if defined(_MSC_VER) && !defined(__clang__) static_assert(std::is_same_v::layout_type, MDSPAN_IMPL_STANDARD_NAMESPACE::layout_right>); static_assert(std::is_same_v::layout_type, MDSPAN_IMPL_STANDARD_NAMESPACE::layout_right>); static_assert(std::is_same_v::layout_type, MDSPAN_IMPL_STANDARD_NAMESPACE::layout_right>); - // #endif - - // static_assert(std::is_same_v); static_assert(std::is_same_v); static_assert( - // std::is_same_v, - // std::layout_right>); - +#endif using T = typename std::decay_t::value_type; static_assert(std::is_same_v::value_type, T>); static_assert(std::is_same_v::value_type, T>); From 55422e2d38c6626861286ce04be0b884d6bfe6fb Mon Sep 17 00:00:00 2001 From: "Garth N. Wells" Date: Sat, 16 Nov 2024 13:09:58 +0000 Subject: [PATCH 10/13] Fix --- cpp/basix/math.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/basix/math.h b/cpp/basix/math.h index b859f9572..c0b1aa4e8 100644 --- a/cpp/basix/math.h +++ b/cpp/basix/math.h @@ -331,7 +331,7 @@ void dot(const U& A, const V& B, W&& C) } else { -#if defined(_MSC_VER) && !defined(__clang__) +#ifndef _MSVC_LANG static_assert(std::is_same_v::layout_type, MDSPAN_IMPL_STANDARD_NAMESPACE::layout_right>); static_assert(std::is_same_v::layout_type, From ff71810ba8b114cc0b6904efabe8951d5b5c2012 Mon Sep 17 00:00:00 2001 From: "Garth N. Wells" Date: Sat, 16 Nov 2024 17:46:27 +0000 Subject: [PATCH 11/13] Generalise mat-mat product --- cpp/basix/dof-transformations.cpp | 46 +++++++++++++++++++++---------- cpp/basix/math.h | 39 ++++++++++++++++---------- 2 files changed, 56 insertions(+), 29 deletions(-) diff --git a/cpp/basix/dof-transformations.cpp b/cpp/basix/dof-transformations.cpp index 03b4a77cf..706d18cfd 100644 --- a/cpp/basix/dof-transformations.cpp +++ b/cpp/basix/dof-transformations.cpp @@ -94,11 +94,11 @@ mapinfo_t get_mapinfo(cell::type cell_type) { mapinfo_t mapinfo; auto& data = mapinfo.try_emplace(cell::type::interval).first->second; - auto map = [](auto pt) -> std::array { return {pt[1], pt[0], 0.0}; }; + auto map = [](auto pt) -> std::array { return {pt[1], pt[0], 0}; }; stdex::mdarray> J( stdex::extents{}, {0., 1., 1., 0.}); - T detJ = -1.0; + T detJ = -1; stdex::mdarray> K( stdex::extents{}, {0., 1., 1., 0.}); data.push_back(std::tuple(map, J, detJ, K)); @@ -485,24 +485,42 @@ std::pair, std::array> compute_transformation( // Interpolate to calculate coefficients std::vector transformb(ndofs * ndofs); mdspan_t transform(transformb.data(), ndofs, ndofs); - for (std::size_t d = 0; d < imat.extent(3); ++d) + + std::vector imat_b(transform.extent(1) * imat.extent(2)); + mdspan_t imat_id(imat_b.data(), transform.extent(1), imat.extent(2)); + + std::vector pushed_datai_b(imat.extent(2) * transform.extent(0)); + mdspan_t pushed_data_i(pushed_datai_b.data(), imat.extent(2), + transform.extent(0)); + + std::vector transformT_b(transform.extent(1) * transform.extent(0)); + mdspan_t transformT(transformT_b.data(), transform.extent(1), + transform.extent(0)); + for (std::size_t i = 0; i < vs; ++i) { - for (std::size_t i = 0; i < vs; ++i) + // Pack pushed_data + for (std::size_t k2 = 0; k2 < imat.extent(2); ++k2) + for (std::size_t k1 = 0; k1 < transform.extent(0); ++k1) + pushed_data_i(k2, k1) = pushed_data(k2, k1 + dofstart, i); + + for (std::size_t d = 0; d < imat.extent(3); ++d) { + // Pack imat for (std::size_t k0 = 0; k0 < transform.extent(1); ++k0) - { - for (std::size_t k1 = 0; k1 < transform.extent(0); ++k1) - { - for (std::size_t k2 = 0; k2 < imat.extent(2); ++k2) - { - transform(k1, k0) - += imat(k0, i, k2, d) * pushed_data(k2, k1 + dofstart, i); - } - } - } + for (std::size_t k2 = 0; k2 < imat.extent(2); ++k2) + imat_id(k0, k2) = imat(k0, i, k2, d); + + // transformT_(k0, k1) = imat_id_(k0, k2) pushed_data_i(k2, k1) + + // transformT_(k0, k1) + math::dot(imat_id, pushed_data_i, transformT, 1, 1); } } + // Transpose 'transformT' -> 'transform' + for (std::size_t k0 = 0; k0 < transform.extent(1); ++k0) + for (std::size_t k1 = 0; k1 < transform.extent(0); ++k1) + transform(k1, k0) = transformT(k0, k1); + return {std::move(transformb), {transform.extent(0), transform.extent(1)}}; } } // namespace diff --git a/cpp/basix/math.h b/cpp/basix/math.h index c0b1aa4e8..9fe053880 100644 --- a/cpp/basix/math.h +++ b/cpp/basix/math.h @@ -51,14 +51,15 @@ namespace basix::math { namespace impl { -/// @brief Compute C = A * B using BLAS. +/// @brief Compute C = alpha A * B + \beta C using BLAS (GEMM). /// @param[in] A Input matrix /// @param[in] B Input matrix -/// @return A * B +/// @param[in] alpha +/// @param[in] beta template void dot_blas(std::span A, std::array Ashape, std::span B, std::array Bshape, - std::span C) + std::span C, T alpha = 1, T beta = 0) { static_assert(std::is_same_v or std::is_same_v); @@ -69,8 +70,6 @@ void dot_blas(std::span A, std::array Ashape, int N = Bshape[1]; int K = Ashape[1]; - T alpha = 1; - T beta = 0; int lda = K; int ldb = N; int ldc = N; @@ -310,42 +309,52 @@ transpose_lu(std::pair, std::array>& A) return perm; } -/// @brief Compute C = A * B. +/// @brief Compute C = \alpha A * B + \beta C /// @param[in] A Input matrix /// @param[in] B Input matrix /// @param[out] C Output matrix. Must be sized correctly before calling /// this function. +/// @param[in] alpha +/// @param[in] beta template -void dot(const U& A, const V& B, W&& C) +void dot(const U& A, const V& B, W&& C, + typename std::decay_t::value_type alpha = 1, + typename std::decay_t::value_type beta = 0) { + using T = typename std::decay_t::value_type; + assert(A.extent(1) == B.extent(0)); assert(C.extent(0) == A.extent(0)); assert(C.extent(1) == B.extent(1)); if (A.extent(0) * B.extent(1) * A.extent(1) < 512) { - std::fill_n(C.data_handle(), C.extent(0) * C.extent(1), 0); for (std::size_t i = 0; i < A.extent(0); ++i) + { for (std::size_t j = 0; j < B.extent(1); ++j) + { + T C0 = C(i, j); + C(i, j) = 0; + T& _C = C(i, j); for (std::size_t k = 0; k < A.extent(1); ++k) - C(i, j) += A(i, k) * B(k, j); + _C += alpha * A(i, k) * B(k, j); + _C = alpha * _C + beta * C0; + } + } } else { -#ifndef _MSVC_LANG static_assert(std::is_same_v::layout_type, MDSPAN_IMPL_STANDARD_NAMESPACE::layout_right>); static_assert(std::is_same_v::layout_type, MDSPAN_IMPL_STANDARD_NAMESPACE::layout_right>); static_assert(std::is_same_v::layout_type, MDSPAN_IMPL_STANDARD_NAMESPACE::layout_right>); -#endif - using T = typename std::decay_t::value_type; static_assert(std::is_same_v::value_type, T>); static_assert(std::is_same_v::value_type, T>); impl::dot_blas( std::span(A.data_handle(), A.size()), {A.extent(0), A.extent(1)}, std::span(B.data_handle(), B.size()), {B.extent(0), B.extent(1)}, - std::span(C.data_handle(), C.size())); + std::span(C.data_handle(), C.size()), alpha, beta); } } @@ -386,8 +395,8 @@ void orthogonalise( norm = std::sqrt(norm); if (norm < 2 * std::numeric_limits::epsilon()) { - throw std::runtime_error( - "Cannot orthogonalise the rows of a matrix with incomplete row rank"); + throw std::runtime_error("Cannot orthogonalise the rows of a matrix " + "with incomplete row rank"); } for (std::size_t k = 0; k < wcoeffs.extent(1); ++k) From 3953b42f40b6ba2943138ca85f3d4299b55e5048 Mon Sep 17 00:00:00 2001 From: "Garth N. Wells" Date: Sat, 16 Nov 2024 18:38:31 +0000 Subject: [PATCH 12/13] Generalise GEMM --- cpp/basix/dof-transformations.cpp | 81 +++++++++++-------------------- cpp/basix/dof-transformations.h | 10 ++-- cpp/basix/math.h | 55 ++++++++++----------- 3 files changed, 63 insertions(+), 83 deletions(-) diff --git a/cpp/basix/dof-transformations.cpp b/cpp/basix/dof-transformations.cpp index 706d18cfd..42793a841 100644 --- a/cpp/basix/dof-transformations.cpp +++ b/cpp/basix/dof-transformations.cpp @@ -1,4 +1,4 @@ -// Copyright (c) 2020 Chris Richardson & Matthew Scroggs +// Copyright (c) 2020-2024 Chris Richardson, Matthew Scroggs and Garth N. Wells // FEniCS Project // SPDX-License-Identifier: MIT @@ -13,10 +13,10 @@ #include #include -#include - using namespace basix; +namespace +{ namespace stdex = MDSPAN_IMPL_STANDARD_NAMESPACE::MDSPAN_IMPL_PROPOSED_NAMESPACE; template @@ -34,17 +34,13 @@ using map_data_t template using mapinfo_t = std::map>>; -namespace -{ //----------------------------------------------------------------------------- int find_first_subentity(cell::type cell_type, cell::type entity_type) { const int edim = cell::topological_dimension(entity_type); std::vector entities = cell::subentity_types(cell_type)[edim]; if (auto it = std::ranges::find(entities, entity_type); it != entities.end()) - { return std::distance(entities.begin(), it); - } else throw std::runtime_error("Entity not found"); } @@ -373,13 +369,12 @@ mapinfo_t get_mapinfo(cell::type cell_type) //----------------------------------------------------------------------------- template std::pair, std::array> compute_transformation( - cell::type cell_type, - const std::array>, 4>& x, - const std::array>, 4>& M, + cell::type cell_type, std::array>, 4> x, + std::array>, 4> M, mdspan_t coeffs, const mdarray_t& J, T detJ, const mdarray_t& K, - const std::function(std::span)> map_point, - int degree, int tdim, int entity, std::size_t vs, const maps::type map_type, + std::function(std::span)> map_point, int degree, + int tdim, int entity, std::size_t vs, const maps::type map_type, const polyset::type ptype) { if (x[tdim].size() == 0 or x[tdim][entity].extent(0) == 0) @@ -431,34 +426,17 @@ std::pair, std::array> compute_transformation( polyset_vals.extent(0)); for (std::size_t j = 0; j < vs; ++j) { - // std::fill(result_b.begin(), result_b.end(), 0); - for (std::size_t k0 = 0; k0 < coeffs.extent(0); ++k0) for (std::size_t k2 = 0; k2 < polyset_vals.extent(0); ++k2) _coeffs(k0, k2) = coeffs(k0, k2 + psize * j); - // R^T = coeffs * polyset_vals - // for (std::size_t k0 = 0; k0 < coeffs.extent(0); ++k0) // big - // for (std::size_t k1 = 0; k1 < polyset_vals.extent(1); ++k1) - // for (std::size_t k2 = 0; k2 < polyset_vals.extent(0); ++k2) // big - // result(k1, k0) += _coeffs(k0, k2) * polyset_vals(k2, k1); - // result(k1, k0) += coeffs(k0, k2 + psize * j) * polyset_vals(k2, k1); - // r^t: coeffs.extent(0) x polyset_vals.extent(1) [k0, k1] // c: coeffs.extent(1) x polyset_vals.extent(0) [k0, k2] // p: polyset_vals.extent(0) x polyset_vals.extent(1) [k2, k1] - math::dot(_coeffs, polyset_vals, result); - - // std::cout << "4: transform" << std::endl; - for (std::size_t k0 = 0; k0 < result.extent(1); ++k0) for (std::size_t k1 = 0; k1 < result.extent(0); ++k1) tabulated_data(k0, k1, j) = result(k1, k0); - - // for (std::size_t k0 = 0; k0 < result.extent(0); ++k0) - // for (std::size_t k1 = 0; k1 < result.extent(1); ++k1) - // tabulated_data(k0, k1, j) = result(k0, k1); } // push forward @@ -471,11 +449,9 @@ std::pair, std::array> compute_transformation( tabulated_data_b.data() + i * tabulated_data.extent(1) * tabulated_data.extent(2), tabulated_data.extent(1), tabulated_data.extent(2)); - push_forward(map_type, mdspan_t(temp_data.data(), temp_data.extents()), tab, J, detJ, K); - for (std::size_t k0 = 0; k0 < temp_data.extent(0); ++k0) for (std::size_t k1 = 0; k1 < temp_data.extent(1); ++k1) pushed_data(i, k0, k1) = temp_data(k0, k1); @@ -529,14 +505,16 @@ template std::map, std::array>> doftransforms::compute_entity_transformations( cell::type cell_type, - const std::array< + std::array< std::vector>>, - 4>& x, - const std::array< + 4> + x, + std::array< std::vector>>, - 4>& M, + 4> + M, MDSPAN_IMPL_STANDARD_NAMESPACE::mdspan< const T, MDSPAN_IMPL_STANDARD_NAMESPACE::dextents> coeffs, @@ -550,7 +528,6 @@ doftransforms::compute_entity_transformations( const int tdim = cell::topological_dimension(entity_type); const int entity = find_first_subentity(cell_type, entity_type); std::size_t ndofs = M[tdim].size() == 0 ? 0 : M[tdim][entity].extent(0); - std::vector transform; transform.reserve(emap_data.size() * ndofs * ndofs); for (auto& [mapfn, J, detJ, K] : emap_data) @@ -574,14 +551,14 @@ template std::map, std::array>> doftransforms::compute_entity_transformations( cell::type, - const std::array>>, - 4>&, - const std::array>>, - 4>&, + std::array>>, + 4>, + std::array>>, + 4>, MDSPAN_IMPL_STANDARD_NAMESPACE::mdspan< const float, MDSPAN_IMPL_STANDARD_NAMESPACE::dextents>, int, std::size_t, maps::type, polyset::type); @@ -590,14 +567,14 @@ template std::map, std::array>> doftransforms::compute_entity_transformations( cell::type, - const std::array>>, - 4>&, - const std::array>>, - 4>&, + std::array>>, + 4>, + std::array>>, + 4>, MDSPAN_IMPL_STANDARD_NAMESPACE::mdspan< const double, MDSPAN_IMPL_STANDARD_NAMESPACE::dextents>, int, std::size_t, maps::type, polyset::type); diff --git a/cpp/basix/dof-transformations.h b/cpp/basix/dof-transformations.h index 3399c7397..596194606 100644 --- a/cpp/basix/dof-transformations.h +++ b/cpp/basix/dof-transformations.h @@ -40,14 +40,16 @@ template std::map, std::array>> compute_entity_transformations( cell::type cell_type, - const std::array< + std::array< std::vector>>, - 4>& x, - const std::array< + 4> + x, + std::array< std::vector>>, - 4>& M, + 4> + M, MDSPAN_IMPL_STANDARD_NAMESPACE::mdspan< const T, MDSPAN_IMPL_STANDARD_NAMESPACE::dextents> coeffs, diff --git a/cpp/basix/math.h b/cpp/basix/math.h index 9fe053880..ac6fff692 100644 --- a/cpp/basix/math.h +++ b/cpp/basix/math.h @@ -1,4 +1,4 @@ -// Copyright (C) 2021 Igor Baratta +// Copyright (C) 2021-2024 Igor Baratta and Garth N. Wells // // This file is part of DOLFINx (https://www.fenicsproject.org) // @@ -6,6 +6,7 @@ #pragma once +#include "mdspan.hpp" #include #include #include @@ -16,8 +17,6 @@ #include #include -#include "mdspan.hpp" - extern "C" { void ssyevd_(char* jobz, char* uplo, int* n, float* a, int* lda, float* w, @@ -45,15 +44,15 @@ extern "C" /// @brief Mathematical functions. /// -/// @note The functions in this namespace are designed to be called -/// multiple times at runtime, so their performance is critical. +/// @note Functions in this namespace are designed to be called multiple +/// times at runtime, so their performance is critical. namespace basix::math { namespace impl { /// @brief Compute C = alpha A * B + \beta C using BLAS (GEMM). -/// @param[in] A Input matrix -/// @param[in] B Input matrix +/// @param[in] A Input matrix. +/// @param[in] B Input matrix. /// @param[in] alpha /// @param[in] beta template @@ -89,8 +88,8 @@ void dot_blas(std::span A, std::array Ashape, } // namespace impl /// @brief Compute the outer product of vectors u and v. -/// @param u The first vector -/// @param v The second vector +/// @param u The first vector. +/// @param v The second vector. /// @return The outer product. The type will be the same as `u`. template std::pair, std::array> @@ -103,7 +102,7 @@ outer(const U& u, const V& v) return {std::move(result), {u.size(), v.size()}}; } -/// Compute the cross product u x v +/// @brief Compute the cross product u x v. /// @param u The first vector. It must has size 3. /// @param v The second vector. It must has size 3. /// @return The cross product `u x v`. The type will be the same as `u`. @@ -116,12 +115,13 @@ std::array cross(const U& u, const V& v) u[0] * v[1] - u[1] * v[0]}; } -/// Compute the eigenvalues and eigenvectors of a square Hermitian matrix A -/// @param[in] A Input matrix, row-major storage -/// @param[in] n Number of rows +/// @brief Compute the eigenvalues and eigenvectors of a square +/// Hermitian matrix A. +/// @param[in] A Input matrix, row-major storage. +/// @param[in] n Number of rows. /// @return Eigenvalues (0) and eigenvectors (1). The eigenvector array /// uses column-major storage, which each column being an eigenvector. -/// @pre The matrix `A` must be symmetric +/// @pre The matrix `A` must be symmetric. template std::pair, std::vector> eigh(std::span A, std::size_t n) @@ -179,9 +179,9 @@ std::pair, std::vector> eigh(std::span A, } /// @brief Solve A X = B. -/// @param[in] A The matrix -/// @param[in] B Right-hand side matrix/vector -/// @return A^{-1} B +/// @param[in] A The matrix. +/// @param[in] B Right-hand side matrix/vector.@brief +/// @return A^{-1} B. template std::vector solve(MDSPAN_IMPL_STANDARD_NAMESPACE::mdspan< @@ -209,6 +209,7 @@ solve(MDSPAN_IMPL_STANDARD_NAMESPACE::mdspan< int nrhs = _B.extent(1); int lda = _A.extent(0); int ldb = _B.extent(0); + // Pivot indices that define the permutation matrix for the LU solver std::vector piv(N); int info; @@ -231,9 +232,9 @@ solve(MDSPAN_IMPL_STANDARD_NAMESPACE::mdspan< return rb; } -/// @brief Check if A is a singular matrix, -/// @param[in] A The matrix -/// @return A bool indicating if the matrix is singular +/// @brief Check if A is a singular matrix. +/// @param[in] A The matrix. +/// @return A bool indicating if the matrix is singular. template bool is_singular( MDSPAN_IMPL_STANDARD_NAMESPACE::mdspan< @@ -277,9 +278,9 @@ bool is_singular( /// @brief Compute the LU decomposition of the transpose of a square /// matrix A. -/// @param[in,out] A The matrix +/// @param[in,out] A The matrix. /// @return The LU permutation, in prepared format (see -/// precompute::prepare_permutation) +/// precompute::prepare_permutation). template std::vector transpose_lu(std::pair, std::array>& A) @@ -326,7 +327,7 @@ void dot(const U& A, const V& B, W&& C, assert(A.extent(1) == B.extent(0)); assert(C.extent(0) == A.extent(0)); assert(C.extent(1) == B.extent(1)); - if (A.extent(0) * B.extent(1) * A.extent(1) < 512) + if (A.extent(0) * B.extent(1) * A.extent(1) < 256) { for (std::size_t i = 0; i < A.extent(0); ++i) { @@ -336,7 +337,7 @@ void dot(const U& A, const V& B, W&& C, C(i, j) = 0; T& _C = C(i, j); for (std::size_t k = 0; k < A.extent(1); ++k) - _C += alpha * A(i, k) * B(k, j); + _C += A(i, k) * B(k, j); _C = alpha * _C + beta * C0; } } @@ -359,8 +360,8 @@ void dot(const U& A, const V& B, W&& C, } /// @brief Build an identity matrix. -/// @param[in] n The number of rows/columns -/// @return Identity matrix using row-major storage +/// @param[in] n The number of rows/columns. +/// @return Identity matrix using row-major storage. template std::vector eye(std::size_t n) { @@ -376,7 +377,7 @@ std::vector eye(std::size_t n) } /// @brief Orthogonalise the rows of a matrix (in place). -/// @param[in] wcoeffs The matrix +/// @param[in] wcoeffs The matrix. /// @param[in] start The row to start from. The rows before this should /// already be orthogonal. template From f9867c9e85bcc42b7fd14d8ee31451b1e3620aa2 Mon Sep 17 00:00:00 2001 From: "Garth N. Wells" Date: Sat, 16 Nov 2024 18:52:38 +0000 Subject: [PATCH 13/13] Extend tests --- test/test_lagrange.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/test_lagrange.py b/test/test_lagrange.py index ca0f4bc35..4c7a2caf8 100644 --- a/test/test_lagrange.py +++ b/test/test_lagrange.py @@ -856,7 +856,7 @@ def test_tet(degree): (basix.CellType.tetrahedron, basix.CellType.tetrahedron), ], ) -@pytest.mark.parametrize("degree", [1, 2, 3, 4]) +@pytest.mark.parametrize("degree", [1, 2, 3, 4, 12]) def test_lagrange(celltype, degree): lagrange = basix.create_element( basix.ElementFamily.P, celltype[1], degree, basix.LagrangeVariant.equispaced @@ -866,7 +866,7 @@ def test_lagrange(celltype, degree): assert np.isclose(np.sum(w, axis=1), 1.0).all() -@pytest.mark.parametrize("degree", [1, 2, 3, 4]) +@pytest.mark.parametrize("degree", [1, 2, 3, 4, 12]) def test_dof_transformations_interval(degree): lagrange = basix.create_element( basix.ElementFamily.P, basix.CellType.interval, degree, basix.LagrangeVariant.equispaced