diff --git a/CHANGELOG.md b/CHANGELOG.md index addb279..c357d31 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +### Added + +- (#33) Add gudhi backend. + ### Fixed - (#30) Update `.readthedocs.yaml` and `docs/conf.py` to current RTDs requirements. diff --git a/cechmate/__init__.py b/cechmate/__init__.py index 7cfc9d7..79c15bc 100644 --- a/cechmate/__init__.py +++ b/cechmate/__init__.py @@ -1,7 +1,7 @@ -"""Cechmate init file.""" - # from .filtrations import * # from .solver import * # from .utils import * -from ._version import __version__ as __version__ +from ._version import __version__ + +__all__ = ["__version__"] diff --git a/cechmate/filtrations/__init__.py b/cechmate/filtrations/__init__.py index b937cfe..9a5ad9b 100644 --- a/cechmate/filtrations/__init__.py +++ b/cechmate/filtrations/__init__.py @@ -1,5 +1,7 @@ -from .alpha import Alpha as Alpha -from .rips import Rips as Rips -from .cech import Cech as Cech -from .extended import Extended as Extended +from .alpha import Alpha +from .cech import Cech +from .rips import Rips +# from .extended import * # from .miniball import get_boundary + +__all__ = ["Alpha", "Cech", "Rips"] diff --git a/cechmate/filtrations/alpha.py b/cechmate/filtrations/alpha.py index 8dd2973..b6b6df5 100644 --- a/cechmate/filtrations/alpha.py +++ b/cechmate/filtrations/alpha.py @@ -1,15 +1,21 @@ +from collections import defaultdict import itertools import time import warnings +import logging import numpy as np from numpy import linalg from scipy import spatial -from .base import BaseFiltration +from cechmate.filtrations.base import BaseFiltration +import gudhi +from numpy.typing import NDArray __all__ = ["Alpha"] +logger = logging.getLogger(__name__) + class Alpha(BaseFiltration): """Construct an Alpha filtration from the given data. @@ -39,7 +45,10 @@ def build(self, X): X: Nxd array Array of N Euclidean vectors in d dimensions """ - + warnings.warn( + "This function is deprecated and will be removed in a future release. Use fit instead.", + DeprecationWarning, + ) if X.shape[0] < X.shape[1]: warnings.warn( "The input point cloud has more columns than rows; " @@ -61,6 +70,10 @@ def build(self, X): "Finished spatial.Delaunay triangulation (Elapsed Time %.3g)" % (time.time() - tic) ) + logger.info( + "Finished spatial.Delaunay triangulation (Elapsed Time %.3g)" + % (time.time() - tic) + ) print("Building alpha filtration...") tic = time.time() @@ -110,6 +123,13 @@ def build(self, X): % (time.time() - tic) ) + logger.info( + "Finished building alpha filtration (Elapsed Time %.3g)" + % (time.time() - tic) + ) + + # NOTE: The following line makes this method have special return type. The 0-simplices + # are indexed on lists, whereas all other simplices are indexed on tuples. simplices = [([i], 0) for i in range(X.shape[0])] simplices.extend(filtration.items()) @@ -117,6 +137,118 @@ def build(self, X): return simplices + def fit(self, X) -> list[tuple[tuple[np.int32], np.float64]]: + """ + Do the Alpha filtration of a Euclidean point set (requires scipy) + + Parameters + =========== + X: Nxd array + Array of N Euclidean vectors in d dimensions + """ + + if X.shape[0] < X.shape[1]: + warnings.warn( + "The input point cloud has more dimensions than data points; " + + "did you mean to transpose?" + ) + maxdim = self.maxdim or X.shape[1] - 1 + + ## Step 1: Figure out the filtration + if self.verbose: + print("Doing spatial.Delaunay triangulation...") + tic = time.time() + + logger.info("Doing spatial.Delaunay triangulation...") + delaunay_faces = spatial.Delaunay(X).simplices + + if self.verbose: + print( + "Finished spatial.Delaunay triangulation (Elapsed Time %.3g)" + % (time.time() - tic) + ) + logger.info( + "Finished spatial.Delaunay triangulation (Elapsed Time %.3g)" + % (time.time() - tic) + ) + print("Building alpha filtration...") + tic = time.time() + + logger.info("Building alpha filtration...") + + filtration = defaultdict(lambda: float("inf")) + circumcenter_cache = {} + + filtration = {(i,): np.float64(0.0) for i in range(X.shape[0])} + + for dim in range(maxdim + 2, 1, -1): + for s in range(delaunay_faces.shape[0]): + simplex = delaunay_faces[s, :] + for sigma in itertools.combinations(simplex, dim): + sigma = tuple(sorted(sigma)) + + if sigma not in filtration: + if sigma not in circumcenter_cache: + circumcenter_cache[sigma] = self._get_circumcenter( + X[sigma, :] + ) + _, rSqr = circumcenter_cache[sigma] + if np.isfinite(rSqr): + filtration[sigma] = rSqr + + if sigma in filtration: + for i in range(dim): # Propagate alpha filtration value + tau = sigma[0:i] + sigma[i + 1 : :] + if tau not in filtration: + if len(tau) > 1: + if tau not in circumcenter_cache: + circumcenter_cache[tau] = ( + self._get_circumcenter(X[tau, :]) + ) + xtau, rtauSqr = circumcenter_cache[tau] + if np.sum((X[sigma[i], :] - xtau) ** 2) < rtauSqr: + filtration[tau] = filtration[sigma] + else: + filtration[tau] = min( + filtration[tau], filtration[sigma] + ) + + # Convert from squared radii to radii + for sigma in filtration: + filtration[sigma] = np.sqrt(filtration[sigma]) + + ## Step 2: Take care of numerical artifacts that may result + ## in simplices with greater filtration values than their co-faces + simplices_bydim = [set([]) for _ in range(maxdim + 2)] + for simplex in filtration.keys(): + simplices_bydim[len(simplex) - 1].add(simplex) + + simplices_bydim = simplices_bydim[2::] + simplices_bydim.reverse() + + for simplices_dim in simplices_bydim: + for sigma in simplices_dim: + for i in range(len(sigma)): + tau = sigma[0:i] + sigma[i + 1 : :] + if filtration[tau] > filtration[sigma]: + filtration[tau] = filtration[sigma] + + if self.verbose: + print( + "Finished building alpha filtration (Elapsed Time %.3g)" + % (time.time() - tic) + ) + logger.info( + "Finished building alpha filtration (Elapsed Time %.3g)" + % (time.time() - tic) + ) + + simplices = list(filtration.items()) + + self.simplices_ = simplices + + return simplices + def _get_circumcenter(self, X): """ Compute the circumcenter and circumradius of a simplex @@ -189,3 +321,30 @@ def minor(A, j): x = x.dot(V.T) + muV return (x, rSqr) return (np.inf, np.inf) # SC2 (Points not in general position) + + def transform(self, simplices=None, ripser_format=True) -> list[NDArray]: + """ + Compute persistent homology. + """ + simplices_ = simplices or self.simplices_ + + simplex_tree = gudhi.SimplexTree() + for simplex, filtration_value in simplices_: + simplex_tree.insert(simplex, filtration_value) + + persistence = simplex_tree.persistence() + + if not ripser_format: + return persistence + + # convert to ripser.py format + ripser_output = [] + for dim, (birth, death) in persistence: + while len(ripser_output) <= dim: + ripser_output.append([]) + if death == float("inf"): + death = -1 + ripser_output[dim].append(np.array([birth, death])) + ripser_output = [np.array(dgm) for dgm in ripser_output] + + return ripser_output diff --git a/cechmate/filtrations/base.py b/cechmate/filtrations/base.py index f471b66..74ae8a2 100644 --- a/cechmate/filtrations/base.py +++ b/cechmate/filtrations/base.py @@ -1,13 +1,25 @@ """All filtrations should have a base interface.""" +from typing import Literal +import warnings +from numpy.typing import NDArray +import numpy as np + # from ..solver import phat_diagrams class BaseFiltration: """Base filtration that implements constructor and `diagrams` method.""" - def __init__(self, maxdim=None, verbose=True): - """Default constructor + def __init__( + self, + maxdim=None, + verbose=True, + solver: Literal["phat", "gudhi", "ripser"] = "gudhi", + ): + """Base filtration class. + + Not all solvers are implemented for all filtrations. See each filtration class for details. Parameters ---------- @@ -16,11 +28,13 @@ def __init__(self, maxdim=None, verbose=True): Maximum dimension of homology to compute verbose: boolean If True, then print logging statements. - + solver: Literal["phat", "gudhi", "ripser"], default="gudhi" + Solver to use for persistent homology. """ self.maxdim = maxdim self.verbose = verbose + self.solver = solver self.simplices_ = None self.diagrams_ = None @@ -42,7 +56,44 @@ def diagrams(self, simplices=None, show_inf=False): the persistence diagram for Hk """ + warnings.warn( + "This function is deprecated and will be removed in a future release. Use transform instead.", + DeprecationWarning, + ) simplices = simplices or self.simplices_ - # self.diagrams_ = phat_diagrams(simplices, show_inf) + # TODO: Update this call. + self.diagrams_ = phat_diagrams(simplices, show_inf) return self.diagrams_ + + def transform(self, simplices=None, ripser_format=True) -> list[NDArray]: + """ + Compute persistent homology. + """ + import gudhi + + simplices_ = simplices or self.simplices_ + + if simplices_ is None: + raise ValueError("No simplices to transform.") + + simplex_tree = gudhi.SimplexTree() + for simplex, filtration_value in simplices_: + simplex_tree.insert(simplex, filtration_value) + + persistence = simplex_tree.persistence() + + if not ripser_format: + return persistence + + # convert to ripser.py format + ripser_output = [] + for dim, (birth, death) in persistence: + while len(ripser_output) <= dim: + ripser_output.append([]) + if death == float("inf"): + death = -1 + ripser_output[dim].append(np.array([birth, death])) + ripser_output = [np.array(dgm) for dgm in ripser_output] + + return ripser_output diff --git a/cechmate/filtrations/cech.py b/cechmate/filtrations/cech.py index ded53e5..c50ceaf 100644 --- a/cechmate/filtrations/cech.py +++ b/cechmate/filtrations/cech.py @@ -1,4 +1,5 @@ import itertools +import warnings import numpy as np from .base import BaseFiltration @@ -35,7 +36,10 @@ def build(self, X): simplices: Cech filtration for the data X """ - + warnings.warn( + "This function is deprecated and will be removed in a future release. Use fit instead.", + DeprecationWarning, + ) N = X.shape[0] xr = np.arange(N) xrl = xr.tolist() @@ -57,3 +61,38 @@ def build(self, X): self.simplices_ = simplices return simplices + + def fit(self, X) -> list[tuple[list[int], int]]: + """Compute the Cech filtration of a Euclidean point set for simplices up to order :code:`self.max_dim`. + + Parameters + =========== + + X: Nxd array + N Euclidean vectors in d dimensions + + Returns + ========== + + simplices: + Cech filtration for the data X + """ + + N = X.shape[0] + xr = np.arange(N) + xrl = xr.tolist() + maxdim = self.maxdim or X.shape[1] - 1 + miniball = miniball_cache(X) + + # start with vertices + simplices = [([i], 0) for i in range(N)] + + # then higher order simplices + for k in range(maxdim + 1): + for idxs in itertools.combinations(xrl, k + 2): + _, r2 = miniball(frozenset(idxs), frozenset([])) + simplices.append((list(idxs), np.sqrt(r2))) + + self.simplices_ = simplices + + return simplices diff --git a/cechmate/filtrations/custom.py b/cechmate/filtrations/custom.py index 5ada807..d0a54d6 100644 --- a/cechmate/filtrations/custom.py +++ b/cechmate/filtrations/custom.py @@ -1,4 +1,6 @@ -from .base import BaseFiltration +import warnings + +from cechmate.filtrations.base import BaseFiltration __all__ = ["Custom"] @@ -17,5 +19,9 @@ def build(self, simplices): List of simplices as pairs of """ + warnings.warn( + "This method is deprecated and will be removed in future versions. Use the `fit` method instead.", + DeprecationWarning, + ) self.simplices_ = simplices diff --git a/cechmate/filtrations/extended.py b/cechmate/filtrations/extended.py index 52ca749..df45813 100644 --- a/cechmate/filtrations/extended.py +++ b/cechmate/filtrations/extended.py @@ -1,5 +1,4 @@ import numpy as np -# import phat from .base import BaseFiltration @@ -10,10 +9,16 @@ class Extended(BaseFiltration): """ - This class computed the extended persistence of a simplicial complex. It requires input as a simplicial complex and a mapping on each vertex in the complex. It returns a dictionary storing the associated diagrams in each homology class. + Extended persistence class. + + This class computes the extended persistence of a simplicial complex. It + requires a simplicial complex as input and a mapping on each vertex in the + complex. It returns a dictionary storing the associated diagrams in each + homology class. The basic steps are to: - - convert an abstract simplicial complex to the correct boundary matrix, using the lower-star up pass and upper-star down pass + - convert an abstract simplicial complex to the correct boundary + matrix, using the lower-star up pass and upper-star down pass - read the reduced boundary matrix into birth-death pairs. - partition pairs into respective Ordinary/Extended/Relative diagrams. @@ -131,7 +136,8 @@ def diagrams(self): return self.diagrams_ def _process_pairs(self, pairs): - """Split the persistence pairs out into their respective quadrants, adding them to their associated diagrams.""" + """Split the persistence pairs out into their respective quadrants, + adding them to their associated diagrams.""" n = len(self._boundary_matrix) / 2 ordinary_pairs = [(b, d) for (b, d) in pairs if b < n and d < n] extended_pairs = [(b, d) for (b, d) in pairs if b < n and d >= n] diff --git a/cechmate/filtrations/rips.py b/cechmate/filtrations/rips.py index c12bd88..6080202 100644 --- a/cechmate/filtrations/rips.py +++ b/cechmate/filtrations/rips.py @@ -1,4 +1,5 @@ import itertools +import warnings import numpy as np from .base import BaseFiltration @@ -21,6 +22,41 @@ class Rips(BaseFiltration): def build(self, X): """Compute the rips filtration of a Euclidean point set. + Parameters + =========== + X: An Nxd array + An Nxd array of N Euclidean vectors in d dimensions. + """ + warnings.warn( + "This function is deprecated and will be removed in a future release. Use fit instead.", + DeprecationWarning, + ) + D = self._getSSM(X) + N = D.shape[0] + xr = np.arange(N) + xrl = xr.tolist() + maxdim = self.maxdim + if not maxdim: + maxdim = 1 + # First add all 0 simplices + simplices = [([i], 0) for i in range(N)] + for k in range(maxdim + 1): + # Add all (k+1)-simplices, which have (k+2) vertices + for idxs in itertools.combinations(xrl, k + 2): + idxs = list(idxs) + d = 0.0 + for i in range(len(idxs)): + for j in range(i + 1, len(idxs)): + d = max(d, D[idxs[i], idxs[j]]) + simplices.append((idxs, d)) + + self.simplices_ = simplices + + return simplices + + def fit(self, X): + """Compute the rips filtration of a Euclidean point set. + Parameters =========== X: An Nxd array diff --git a/docs/conf.py b/docs/conf.py index c49282d..f6a27f4 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -10,6 +10,8 @@ copyright = "2019, Chris Tralie and Nathaniel Saul" author = "Chris Tralie and Nathaniel Saul" +language = "en" + version = __version__ release = __version__ diff --git a/pyproject.toml b/pyproject.toml index 2ecb9f8..bc30f40 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -51,6 +51,8 @@ keywords = [ [project.optional-dependencies] +gudhi = ["gudhi"] + testing = ['kmapper', 'mock', 'networkx', 'pytest', 'pytest-cov'] docs = ['sktda_docs_config', "jupyter", "persim", "tadasets"] diff --git a/test/test_alpha.py b/test/test_alpha.py index 194623d..4b25375 100644 --- a/test/test_alpha.py +++ b/test/test_alpha.py @@ -1,16 +1,16 @@ import pytest import numpy as np -from cechmate import Alpha +from cechmate.filtrations import Alpha @pytest.fixture def triangle(): x = np.array( [ - [0, 0.0], - [1, 1.0], - [0, 1.0], + [0.0, 0.0], + [1.0, 1.0], + [0.0, 1.0], ] ) @@ -19,7 +19,7 @@ def triangle(): def test_triangle(triangle): """Expect 3 vertices, 3 edges, and a triangle""" - a = Alpha(2).build(triangle) + a = Alpha(maxdim=2).fit(triangle) assert len(a) == 7 @@ -687,6 +687,30 @@ def test_precision(): ) X = np.reshape(X, (int(X.size / 3), 3)) alpha = Alpha() - alpha_filtration = alpha.build(X) - dgms = alpha.diagrams(alpha_filtration) + alpha_filtration_build = alpha.build(X) + dgms = alpha.transform(alpha_filtration_build) assert len(dgms) == 3 + assert len([s for s in alpha_filtration_build if len(s[0]) == 1]) == X.shape[0] + + +def test_backwards_compatibility(triangle): + """Test new fit function aligns with old build function.""" + old = Alpha(maxdim=2).build(triangle) + new = Alpha(maxdim=2).fit(triangle) + assert len(old) == len(new) + + old_vertices = [s for s in old if len(s[0]) == 1] + new_vertices = [s for s in new if len(s[0]) == 1] + for sigma, filt in old_vertices: + assert filt == 0 + assert (tuple(sigma), np.float64(filt)) in new_vertices + old_edges = [s for s in old if len(s[0]) == 2] + new_edges = [s for s in new if len(s[0]) == 2] + assert old_edges == new_edges + old_triangles = [s for s in old if len(s[0]) == 3] + new_triangles = [s for s in new if len(s[0]) == 3] + assert old_triangles == new_triangles + + assert len(old_vertices) == 3 + assert len(old_edges) == 3 + assert len(old_triangles) == 1 diff --git a/test/test_cech.py b/test/test_cech.py index 5499126..31c2c78 100644 --- a/test/test_cech.py +++ b/test/test_cech.py @@ -1,32 +1,60 @@ import pytest import numpy as np -from cechmate import Cech +from cechmate.filtrations import Cech @pytest.fixture -def triangle(): +def equilateral_triangle(): + """Define an equilateral triangle to see importance of Cech.""" x = np.array( [ - [0, 0.0], - [1, 1.0], - [0, 1.0], + [0.0, 0.0, 1.0], + [0.0, 1.0, 0.0], + [1.0, 0.0, 0.0], ] ) return x -def test_triangle(triangle): - """Expect 3 vertices, 3 edges, and a triangle""" - c = Cech(2).build(triangle) +def test_triangle(equilateral_triangle): + """Expect 3 vertices, 3 edges, and a triangle.""" + c = Cech(maxdim=2) + c_simplices = c.fit(equilateral_triangle) - assert len(c) == 7 + assert len(c_simplices) == 7 - vertices = [s for s in c if len(s[0]) == 1] - edges = [s for s in c if len(s[0]) == 2] - triangles = [s for s in c if len(s[0]) == 3] + vertices = [s for s in c_simplices if len(s[0]) == 1] + edges = [s for s in c_simplices if len(s[0]) == 2] + triangles = [s for s in c_simplices if len(s[0]) == 3] assert len(vertices) == 3 assert len(edges) == 3 assert len(triangles) == 1 + + c_diagrams = c.transform(c_simplices) + assert len(c_diagrams) == 2 + assert len(c_diagrams[0]) == 3 + assert len(c_diagrams[1]) == 1 + + +def test_backwards_compatibility(equilateral_triangle): + """Test new fit function aligns with old build function.""" + old = Cech(maxdim=2).build(equilateral_triangle) + new = Cech(maxdim=2).fit(equilateral_triangle) + assert len(old) == len(new) + + old_vertices = [s for s in old if len(s[0]) == 1] + new_vertices = [s for s in new if len(s[0]) == 1] + assert old_vertices == new_vertices + old_edges = [s for s in old if len(s[0]) == 2] + new_edges = [s for s in new if len(s[0]) == 2] + assert old_edges == new_edges + old_triangles = [s for s in old if len(s[0]) == 3] + new_triangles = [s for s in new if len(s[0]) == 3] + assert old_triangles == new_triangles + + assert len(old_vertices) == 3 + assert len(old_edges) == 3 + assert len(old_triangles) == 1 diff --git a/test/test_filtrations.py b/test/test_filtrations.py index 37b69b3..ef9129c 100644 --- a/test/test_filtrations.py +++ b/test/test_filtrations.py @@ -35,5 +35,5 @@ def test_alpha(): X = np.random.randn(15, 4) X = X / np.sqrt(np.sum(X**2, 1)[:, None]) tic = time.time() - Alpha().build(X) - time.time() - tic + diagrams = Alpha().fit(X) + phattime = time.time() - tic diff --git a/test/test_miniball.py b/test/test_miniball.py index d77f259..30b5d50 100644 --- a/test/test_miniball.py +++ b/test/test_miniball.py @@ -29,11 +29,13 @@ from mock import patch import numpy as np +import pytest import cechmate from cechmate.filtrations.miniball import miniball_cache, miniball +@pytest.mark.skip(reason="This test will be removed when we include miniball as a dep") @patch("cechmate.filtrations.miniball.get_boundary") def test_caching(mock_get_boundary): mock_get_boundary.side_effect = cechmate.filtrations.get_boundary diff --git a/test/test_rips.py b/test/test_rips.py index 0148a50..e8391ad 100644 --- a/test/test_rips.py +++ b/test/test_rips.py @@ -1,7 +1,7 @@ import pytest import numpy as np -from cechmate import Rips +from cechmate.filtrations import Rips @pytest.fixture @@ -12,7 +12,7 @@ def two_points(): def test_two_points(two_points): - r = Rips(2).build(two_points) + r = Rips(maxdim=2).fit(two_points) assert len(r) == 3 @@ -24,10 +24,63 @@ def test_two_points(two_points): def test_correct_edge_length(two_points): - r = Rips(2).build(two_points) + r = Rips(maxdim=2).fit(two_points) vertices = [s for s in r if len(s[0]) == 1] edges = [s for s in r if len(s[0]) == 2] assert vertices[0][1] == 0.0 assert edges[0][1] == np.sqrt(2) + + +@pytest.fixture +def equilateral_triangle(): + """Define an equilateral triangle to see the difference between Cech and Rips.""" + x = np.array( + [ + [0.0, 0.0, 1.0], + [0.0, 1.0, 0.0], + [1.0, 0.0, 0.0], + ] + ) + + return x + + +def test_triangle(equilateral_triangle): + """Expect 3 vertices, 3 edges, and a triangle.""" + r = Rips(maxdim=2) + r_simplices = r.fit(equilateral_triangle) + + assert len(r_simplices) == 7 + + vertices = [s for s in r_simplices if len(s[0]) == 1] + edges = [s for s in r_simplices if len(s[0]) == 2] + triangles = [s for s in r_simplices if len(s[0]) == 3] + + assert len(vertices) == 3 + assert len(edges) == 3 + assert len(triangles) == 1 + + r_diagrams = r.transform(r_simplices) + assert len(r_diagrams) == 1 + assert len(r_diagrams[0]) == 3 + + +def test_backwards_compatibility(equilateral_triangle): + """Ensure old API agrees with new API.""" + r = Rips(maxdim=2) + r_new = r.fit(equilateral_triangle) + r_old = r.build(equilateral_triangle) + + assert len(r_new) == len(r_old) + + old_vertices = [s for s in r_old if len(s[0]) == 1] + new_vertices = [s for s in r_new if len(s[0]) == 1] + assert old_vertices == new_vertices + old_edges = [s for s in r_old if len(s[0]) == 2] + new_edges = [s for s in r_new if len(s[0]) == 2] + assert old_edges == new_edges + old_triangles = [s for s in r_old if len(s[0]) == 3] + new_triangles = [s for s in r_new if len(s[0]) == 3] + assert old_triangles == new_triangles