11from collections import namedtuple , deque
22from operator import attrgetter
33from itertools import chain , count
4+ from distutils .version import LooseVersion as _LooseVersion
5+
46import heapq
57import numpy
68
79import scipy .cluster .hierarchy
10+ import scipy .spatial .distance
11+
812from Orange .distance import Euclidean , PearsonR
913
1014__all__ = ['HierarchicalClustering' ]
1519WEIGHTED = "weighted"
1620WARD = "ward"
1721
22+ # Does scipy implement a O(n**2) NN chain algorithm?
23+ _HAS_NN_CHAIN = hasattr (scipy .cluster .hierarchy , "_hierarchy" ) and \
24+ hasattr (scipy .cluster .hierarchy ._hierarchy , "nn_chain" )
25+
26+ # Prior to 0.18 scipy.cluster.hierarchical's python interface disallowed
27+ # ward clustering from a precomputed distance matrix even though it's cython
28+ # implementation allowed it and was documented to support it (scipy issue 5220)
29+ _HAS_WARD_LINKAGE_FROM_DIST = \
30+ _LooseVersion (scipy .__version__ ) >= _LooseVersion ("0.18" ) and \
31+ _HAS_NN_CHAIN
32+
1833
1934def condensedform (X , mode = "upper" ):
2035 X = numpy .asarray (X )
@@ -86,7 +101,23 @@ def dist_matrix_linkage(matrix, linkage=AVERAGE):
86101 """
87102 # Extract compressed upper triangular distance matrix.
88103 distances = condensedform (matrix )
89- return scipy .cluster .hierarchy .linkage (distances , method = linkage )
104+ if linkage == WARD and not _HAS_WARD_LINKAGE_FROM_DIST :
105+ # Avoid `scipy.cluster.hierarchy.linkage` and dispatch to it's
106+ # cython implementation directly.
107+ # This the core of the scipy.cluster.hierarchy.linkage in
108+ # scipy 0.16, 0.17. Assuming the branches are in bug fix mode
109+ # only so this interface will not change.
110+ y = numpy .asarray (distances , dtype = float )
111+ scipy .spatial .distance .is_valid_y (y , throw = True )
112+ N = scipy .spatial .distance .num_obs_y (y )
113+ # allocate the output linkage matrix
114+ Z = numpy .zeros ((N - 1 , 4 ))
115+ # retrieve the correct method flag
116+ method = scipy .cluster .hierarchy ._cpy_euclid_methods ["ward" ]
117+ scipy .cluster .hierarchy ._hierarchy .linkage (y , Z , int (N ), int (method ))
118+ return Z
119+ else :
120+ return scipy .cluster .hierarchy .linkage (distances , method = linkage )
90121
91122
92123def dist_matrix_clustering (matrix , linkage = AVERAGE ):
@@ -96,9 +127,7 @@ def dist_matrix_clustering(matrix, linkage=AVERAGE):
96127 :param Orange.misc.DistMatrix matrix:
97128 :param str linkage:
98129 """
99- # Extract compressed upper triangular distance matrix.
100- distances = condensedform (matrix )
101- Z = scipy .cluster .hierarchy .linkage (distances , method = linkage )
130+ Z = dist_matrix_linkage (matrix , linkage = linkage )
102131 return tree_from_linkage (Z )
103132
104133
@@ -363,12 +392,11 @@ def item(node):
363392 heap = [item (tree )]
364393
365394 while len (heap ) < k :
366- key , cl = heapq . heappop ( heap )
395+ _ , cl = heap [ 0 ] # peek
367396 if cl .is_leaf :
368397 assert all (n .is_leaf for _ , n in heap )
369- heapq .heappush (heap , (key , cl ))
370398 break
371-
399+ key , cl = heapq . heappop ( heap )
372400 left , right = cl .left , cl .right
373401 heapq .heappush (heap , item (left ))
374402 heapq .heappush (heap , item (right ))
0 commit comments