Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added clusters_from_cover to kmapper. #213

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 23 additions & 0 deletions kmapper/kmapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -827,6 +827,29 @@ def data_from_cluster_id(self, cluster_id, graph, data):
else:
return np.array([])

def clusters_from_cover(self, cube_ids, graph):
"""Returns the clusters and their members from the subset of the cover spanned by the given cube_ids

Parameters
----------
cube_ids : list of int
List of hypercube indices.
graph : dict
The resulting dictionary after applying map().

Returns
-------
clusters : dict
cluster membership indexed by cluster ID (subset of `graph["nodes"]`).

"""
clusters = {}
cluster_id_prefixes = tuple(["cube"+str(i)+"_" for i in cube_ids])
for cluster_id, cluster_members in graph["nodes"].items():
if cluster_id.startswith(cluster_id_prefixes):
clusters[cluster_id] = cluster_members
return clusters

def _process_projection_tuple(self, projection):
# Detect if projection is a tuple (for prediction functions)
# TODO: multi-label models
Expand Down
34 changes: 34 additions & 0 deletions test/test_mapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,24 @@ def test_wrong_id(self):
mems = mapper.data_from_cluster_id("new node", graph, data)
np.testing.assert_array_equal(mems, np.array([]))

def test_clusters_from_cover(self):
mapper = KeplerMapper(verbose=1)
data = np.random.rand(100, 2)

graph = mapper.map(data)
cube_ids = mapper.cover.find(data[0])
mems = mapper.clusters_from_cover(cube_ids, graph)
assert len(mems) > 0
for cluster_id, cluster_members in mems.items():
np.testing.assert_array_equal(cluster_members, graph["nodes"][cluster_id])

def test_no_clusters_from_cover(self):
mapper = KeplerMapper(verbose=1)
data = np.random.rand(100, 2)

graph = mapper.map(data)
mems = mapper.clusters_from_cover([999], graph)
assert len(mems) == 0

class TestMap:
def test_simplices(self):
Expand All @@ -95,6 +113,22 @@ def test_simplices(self):
assert len(nodes) == 3
assert len(edges) == 3

def test_nodes(self):
mapper = KeplerMapper()

X = np.random.rand(100, 2)
lens = mapper.fit_transform(X)
graph = mapper.map(
lens,
X=X,
cover=Cover(n_cubes=3, perc_overlap=0.75),
clusterer=cluster.DBSCAN(metric="euclidean", min_samples=3),
)
assert len(graph["nodes"]) == 3
for i, cluster_id in enumerate(graph["nodes"]):
# verify cluster ID format
assert cluster_id == "cube{}_cluster0".format(i)

def test_precomputed(self):
mapper = KeplerMapper()

Expand Down