Skip to content

Commit

Permalink
Merge pull request #16 from ArneDefauw/main
Browse files Browse the repository at this point in the history
z_score normalization before consensus clustering
  • Loading branch information
ArneDefauw authored Jan 14, 2025
2 parents a1f3ad8 + 59ac0dd commit 1428150
Showing 1 changed file with 23 additions and 1 deletion.
24 changes: 23 additions & 1 deletion src/flowsom/models/consensus_cluster.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
from itertools import combinations

import numpy as np
from scipy.stats import zscore
from sklearn.cluster import AgglomerativeClustering

from . import BaseClusterEstimator
Expand All @@ -34,7 +35,15 @@ class ConsensusCluster(BaseClusterEstimator):
"""

def __init__(
self, n_clusters, K=None, H=100, resample_proportion=0.9, linkage="average", cluster=AgglomerativeClustering
self,
n_clusters,
K=None,
H=100,
resample_proportion=0.9,
linkage="average",
z_score=False,
z_cap=3, # ignored if z_score is False
cluster=AgglomerativeClustering,
):
super().__init__()
assert 0 <= resample_proportion <= 1, "proportion has to be between 0 and 1"
Expand All @@ -44,6 +53,9 @@ def __init__(
self.resample_proportion = resample_proportion
self.cluster = cluster
self.linkage = linkage
self.z_score = z_score
assert z_cap > 0, f"z_cap should be stricly positive, but got {z_cap}"
self.z_cap = z_cap

def _internal_resample(self, data, proportion):
"""Resamples the data.
Expand All @@ -62,6 +74,9 @@ def fit(self, data):
Args:
* data -> (examples,attributes) format
"""
# zscore and clip
if self.z_score:
data = self._z_score(data)
Mk = np.zeros((data.shape[0], data.shape[0]))
Is = np.zeros((data.shape[0],) * 2)
for _ in range(self.H):
Expand Down Expand Up @@ -89,4 +104,11 @@ def fit(self, data):

def fit_predict(self, data):
"""Predicts on the consensus matrix, for best found cluster number."""
if self.z_score:
data = self._z_score(data)
return self.cluster(n_clusters=self.n_clusters, linkage=self.linkage).fit_predict(data)

def _z_score(self, data):
data = zscore(data, axis=0)
data = np.clip(data, a_min=-self.z_cap, a_max=self.z_cap)
return data

0 comments on commit 1428150

Please sign in to comment.