pysal · ljwolf · Sep 17, 2019 · Sep 18, 2019 · Sep 18, 2019 · Sep 18, 2019
diff --git a/esda/__init__.py b/esda/__init__.py
@@ -19,7 +19,7 @@
 from .gamma import Gamma
 from .util import fdr
 from .smaup import Smaup
-from .lee import Spatial_Pearson, Local_Spatial_Pearson
+from .lee import Spatial_Pearson, Spatial_Pearson_Local
 from .silhouettes import (path_silhouette, boundary_silhouette,
                           silhouette_alist, nearest_label)
 from . import adbscan
diff --git a/esda/lee.py b/esda/lee.py
@@ -3,6 +3,103 @@
 from sklearn.base import BaseEstimator
 from sklearn import preprocessing
 from sklearn import utils
+from itertools import chain
+
+try:
+    from joblib import Parallel, delayed
+    _HAS_JOBLIB = True
+except ModuleNotFoundError:
+    _HAS_JOBLIB = False
+
+class Pearson_Local(BaseEstimator):
+    """This splits the pearson's R into its individual site components"""
+
+    def __init__(self, conditional_inference=False, permutations=999i, n_jobs=-1):
+        self.permutations = permutations
+        self.conditional_inference = conditional_inference
+        self.n_jobs = -1
+
+    def fit(self, x, y=None):
+        if y is not None:
+            x,y = utils.check_X_y(x, y, ensure_2d=False, estimator=self)
+            X = numpy.column_stack((x, y))
+        else:
+            X = utils.check_array(x, ensure_2d=True, ensure_min_features=2, estimator=self)
+        n,p = X.shape
+        Z = preprocessing.StandardScaler().fit_transform(X)
+        self.associations_ = self._statistic(Z)
+
+        if y is not None:
+            self.associations_ = self.associations[0,1]
+
+        if (self.permutations is None) or (self.permutations < 1):
+            self.reference_distribution_ = None
+            self.significance_ = numpy.nan
+            return self
+
+        if self.conditinal_inference is not False:
+            if self.conditional_inference == 'x':
+                permuter = ((numpy.arange(n), numpy.random.permutation(n))
+                            for _ in range(self.permutations))
+            elif self.conditional_inference == 'y':
+                permuter = ((numpy.random.permutation(n), numpy.arange(n))
+                            for _ in range(self.permutations))
+            elif self.conditional_inference == True:
+                permute_x = ((numpy.random.permutation(n), numpy.arange(n))
+                              for _ in range(self.permutations // 2))
+                permute_y = ((numpy.arange(n), numpy.random.permutation(n))
+                              for _ in range(self.permutations // 2))
+                permuter = chain(permute_x, permute_y) 
+        else:
+            permuter = ((numpy.random.permutation(n), numpy.random.permutation(n))
+                        for _ in range(self.permutations))
+        n_jobs = self.n_jobs
+        if not HAS_JOBLIB and (n_jobs != 1):
+            warn('The joblib package is required to run parallel'
+                 ' simulations for this model. Please install '
+                 ' joblib to enable parallel processing for simulations.')
+            n_jobs = 1
+            simulations = [self._statistic(numpy.column_stack((Z[0,rx], 
+                                                               Z[1,ry]))
+                                                               )
+                           for rx,ry in permuter]
+        else:
+            simulations = Parallel(n_jobs=n_jobs)(
+                            delayed(self._statistic)(numpy.column_stack((Z[0,rx],
+                                                                         Z[1,ry]))
+                                                                         )
+                            for rx,ry in permuter
+                            )
+        if self.conditional_inference == True:
+            rz = numpy.arctanh(simulations)
+            firsthalf, secondhalf = numpy.array_split(rz, 2)
+            from scipy.stats import ttest_rel
+            post_hoc_test = ttest_rel(firsthalf, secondhalf)
+            if post_hoc_test.pvalue < .01:
+                warn('The null hypothesis that permutations of X yield equivalent'
+                     ' correlations to permutations of y is very unlikely given'
+                     ' the permutations conducted (p = {p}). It is very likely'
+                     ' that any inference based on the conditional permutation'
+                     ' will be incorrect. Use conditional_inference=False in'
+                     ' this case.')
+        self.reference_distribution_ = numpy.row_stack(simulations).T
+        above = self.reference_distribution_ >= self.associations_.reshape(-1,1)
+        larger = above.sum(axis=1)
+        extreme = numpy.minimum(larger, self.permutations - larger)
+        self.significance_ = (extreme + 1.) / (self.permutations + 1.)
+        self.reference_distribution_ = self.reference_distribution_.T
+        return self
+
+
+
+
+
+    @staticmethod
+    def _statistic(Z):
+        N,P = X.shape
+        # is in shape n, p, p
+        return (Z.T * Z.T[:,None]) / N
+
 
 class Spatial_Pearson(BaseEstimator):
     """Global Spatial Pearson Statistic"""
@@ -72,8 +169,12 @@ def fit(self, x, y):
                                                   self.connectivity.sum(axis=1))
 
         if (self.permutations is None):
+            self.reference_distribution_ = None
+            self.significance_ = numpy.nan
             return self
         elif self.permutations < 1:
+            self.reference_distribution_ = None
+            self.significance_ = numpy.nan
             return self
 
         if self.permutations:
@@ -93,7 +194,7 @@ def _statistic(Z,W):
         return (Z.T @ ctc @ Z) / (ones.T @ ctc @ ones)
 
 
-class Local_Spatial_Pearson(BaseEstimator):
+class Spatial_Pearson_Local(BaseEstimator):
     """Local Spatial Pearson Statistic"""
 
     def __init__(self, connectivity=None, permutations=999):
@@ -218,6 +319,7 @@ def fit(self, x, y):
             self.reference_distribution_ = self.reference_distribution_.T
         else:
             self.reference_distribution_ = None
+            self.significance_ = numpy.nan
         return self
 
     @staticmethod

diff --git a/tests/test_lee.py b/tests/test_lee.py
@@ -0,0 +1,84 @@
+import unittest
+import libpysal
+import geopandas
+from libpysal.common import pandas, RTOL, ATOL
+from .. import lee
+import numpy
+
+
+PANDAS_EXTINCT = pandas is None
+
+class Lee_Tester(unittest.TestCase):
+    def setUp(self):
+        self.data = geopandas.read_file(libpysal.examples.get_path("columbus.shp"))
+        self.w = libpysal.weights.Queen.from_dataframe(self.data)
+        self.w.transform = 'r'
+        self.x = self.data[['HOVAL']].values
+        self.y = self.data[['CRIME']].values
+
+    def test_global(self):
+        numpy.random.seed(2478879)
+        result = lee.Spatial_Pearson(connectivity=self.w.sparse).fit(self.x, self.y)
+        known = numpy.array([[ 0.30136527, -0.23625603],
+                             [-0.23625603,  0.53512008]])
+        numpy.testing.assert_allclose(known,result.association_, rtol=RTOL, atol=ATOL)
+        numpy.testing.assert_array_equal(result.reference_distribution_.shape, (999,2,2))
+        first_rep = numpy.array([[ 0.22803705, -0.08053692],
+                                 [-0.08053692,  0.18897318]])
+
+        second_rep = numpy.array([[ 0.14179274, -0.06962692],
+                                  [-0.06962692,  0.13688337]])
+        numpy.testing.assert_allclose(first_rep, result.reference_distribution_[0],
+                                      rtol=RTOL, atol=ATOL)
+        numpy.testing.assert_allclose(second_rep, result.reference_distribution_[1],
+                                      rtol=RTOL, atol=ATOL)
+
+        known_significance = numpy.array([[0.125, 0.026],
+                                          [0.026, 0.001]])
+        numpy.testing.assert_allclose(known_significance, result.significance_, 
+                                      rtol=RTOL, atol=ATOL)
+
+    def test_local(self):
+        numpy.random.seed(2478879)
+        result = lee.Spatial_Pearson_Local(connectivity=self.w.sparse).fit(self.x, self.y)
+        known_locals = numpy.array([ 0.10246023, -0.24169198, -0.1308714 ,  
+                                     0.00895543, -0.16080899, -0.00950808, 
+                                     -0.14615398, -0.0627634 ,  0.00661232, 
+                                     -0.42354628, -0.73121006,  0.02060548,  
+                                     0.05187356,  0.06515283, -0.64400723,
+                                    -0.37489818, -2.06573667, -0.10931854,  
+                                    0.50823848, -0.06338637, -0.10559429,  
+                                    0.03282849, -0.86618915, -0.62333825, 
+                                    -0.40910044,-0.41866868, -0.00702983, 
+                                    -0.4246288 , -0.52142507, -0.22481772,
+                                    0.1931263 , -1.39355214,  0.02036755,  
+                                    0.22896308, -0.00240854, -0.30405211, 
+                                    -0.66950406, -0.21481868, -0.60320158, 
+                                    -0.38117303, -0.45584563,  0.32019362, 
+                                    -0.02818729, -0.02214172,  0.05587915,
+                                    0.0295999 , -0.78818135,  0.16854472,  
+                                    0.2378127 ])
+        numpy.testing.assert_allclose(known_locals, result.associations_, 
+                                      rtol=RTOL, atol=ATOL)
+        significances = numpy.array([0.154, 0.291, 0.358, 0.231, 0.146, 
+                                     0.335, 0.325, 0.388, 0.244, 0.111, 
+                                     0.019, 0.165, 0.136, 0.073, 0.014, 
+                                     0.029, 0.002, 0.376, 0.003, 0.265, 
+                                     0.449, 0.121, 0.072, 0.006, 0.036, 
+                                     0.06 , 0.355, 0.01 , 0.017, 0.168, 
+                                     0.022, 0.003, 0.217, 0.016, 0.337, 
+                                     0.137, 0.015, 0.128, 0.11 , 0.09 , 
+                                     0.168, 0.031, 0.457, 0.44 , 0.141,
+                                     0.249, 0.158, 0.018, 0.031])
+        numpy.testing.assert_allclose(significances, result.significance_,
+                                      rtol=RTOL, atol=ATOL)
+
+suite = unittest.TestSuite()
+test_classes = [Lee_Tester] 
+for i in test_classes:
+    a = unittest.TestLoader().loadTestsFromTestCase(i)
+    suite.addTest(a)
+
+if __name__ == '__main__':
+    runner = unittest.TextTestRunner()
+    runner.run(suite)