scikit-tda · pulquero · Feb 13, 2021
diff --git a/kmapper/kmapper.py b/kmapper/kmapper.py
@@ -827,6 +827,29 @@ def data_from_cluster_id(self, cluster_id, graph, data):
         else:
             return np.array([])
 
+    def clusters_from_cover(self, cube_ids, graph):
+        """Returns the clusters and their members from the subset of the cover spanned by the given cube_ids
+
+          Parameters
+          ----------
+          cube_ids : list of int
+              List of hypercube indices.
+          graph : dict
+              The resulting dictionary after applying map().
+
+          Returns
+          -------
+          clusters : dict
+              cluster membership indexed by cluster ID (subset of `graph["nodes"]`).
+
+        """
+        clusters = {}
+        cluster_id_prefixes = tuple(["cube"+str(i)+"_" for i in cube_ids])
+        for cluster_id, cluster_members in graph["nodes"].items():
+            if cluster_id.startswith(cluster_id_prefixes):
+                clusters[cluster_id] = cluster_members
+        return clusters
+
     def _process_projection_tuple(self, projection):
         # Detect if projection is a tuple (for prediction functions)
         # TODO: multi-label models

diff --git a/test/test_mapper.py b/test/test_mapper.py
@@ -75,6 +75,24 @@ def test_wrong_id(self):
         mems = mapper.data_from_cluster_id("new node", graph, data)
         np.testing.assert_array_equal(mems, np.array([]))
 
+    def test_clusters_from_cover(self):
+        mapper = KeplerMapper(verbose=1)
+        data = np.random.rand(100, 2)
+
+        graph = mapper.map(data)
+        cube_ids = mapper.cover.find(data[0])
+        mems = mapper.clusters_from_cover(cube_ids, graph)
+        assert len(mems) > 0
+        for cluster_id, cluster_members in mems.items():
+            np.testing.assert_array_equal(cluster_members, graph["nodes"][cluster_id])
+
+    def test_no_clusters_from_cover(self):
+        mapper = KeplerMapper(verbose=1)
+        data = np.random.rand(100, 2)
+
+        graph = mapper.map(data)
+        mems = mapper.clusters_from_cover([999], graph)
+        assert len(mems) == 0
 
 class TestMap:
     def test_simplices(self):
@@ -95,6 +113,22 @@ def test_simplices(self):
         assert len(nodes) == 3
         assert len(edges) == 3
 
+    def test_nodes(self):
+        mapper = KeplerMapper()
+
+        X = np.random.rand(100, 2)
+        lens = mapper.fit_transform(X)
+        graph = mapper.map(
+            lens,
+            X=X,
+            cover=Cover(n_cubes=3, perc_overlap=0.75),
+            clusterer=cluster.DBSCAN(metric="euclidean", min_samples=3),
+        )
+        assert len(graph["nodes"]) == 3
+        for i, cluster_id in enumerate(graph["nodes"]):
+            # verify cluster ID format
+            assert cluster_id == "cube{}_cluster0".format(i)
+
     def test_precomputed(self):
         mapper = KeplerMapper()