Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[WIP] Local (aspatial) Correlation #89

Open
wants to merge 7 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion esda/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
from .gamma import Gamma
from .util import fdr
from .smaup import Smaup
from .lee import Spatial_Pearson, Local_Spatial_Pearson
from .lee import Spatial_Pearson, Spatial_Pearson_Local
from .silhouettes import (path_silhouette, boundary_silhouette,
silhouette_alist, nearest_label)
from . import adbscan
104 changes: 103 additions & 1 deletion esda/lee.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,103 @@
from sklearn.base import BaseEstimator
from sklearn import preprocessing
from sklearn import utils
from itertools import chain

try:
from joblib import Parallel, delayed
_HAS_JOBLIB = True
except ModuleNotFoundError:
_HAS_JOBLIB = False

class Pearson_Local(BaseEstimator):
"""This splits the pearson's R into its individual site components"""

def __init__(self, conditional_inference=False, permutations=999i, n_jobs=-1):
self.permutations = permutations
self.conditional_inference = conditional_inference
self.n_jobs = -1

def fit(self, x, y=None):
if y is not None:
x,y = utils.check_X_y(x, y, ensure_2d=False, estimator=self)
X = numpy.column_stack((x, y))
else:
X = utils.check_array(x, ensure_2d=True, ensure_min_features=2, estimator=self)
n,p = X.shape
Z = preprocessing.StandardScaler().fit_transform(X)
self.associations_ = self._statistic(Z)

if y is not None:
self.associations_ = self.associations[0,1]

if (self.permutations is None) or (self.permutations < 1):
self.reference_distribution_ = None
self.significance_ = numpy.nan
return self

if self.conditinal_inference is not False:
if self.conditional_inference == 'x':
permuter = ((numpy.arange(n), numpy.random.permutation(n))
for _ in range(self.permutations))
elif self.conditional_inference == 'y':
permuter = ((numpy.random.permutation(n), numpy.arange(n))
for _ in range(self.permutations))
elif self.conditional_inference == True:
permute_x = ((numpy.random.permutation(n), numpy.arange(n))
for _ in range(self.permutations // 2))
permute_y = ((numpy.arange(n), numpy.random.permutation(n))
for _ in range(self.permutations // 2))
permuter = chain(permute_x, permute_y)
else:
permuter = ((numpy.random.permutation(n), numpy.random.permutation(n))
for _ in range(self.permutations))
n_jobs = self.n_jobs
if not HAS_JOBLIB and (n_jobs != 1):
warn('The joblib package is required to run parallel'
' simulations for this model. Please install '
' joblib to enable parallel processing for simulations.')
n_jobs = 1
simulations = [self._statistic(numpy.column_stack((Z[0,rx],
Z[1,ry]))
)
for rx,ry in permuter]
else:
simulations = Parallel(n_jobs=n_jobs)(
delayed(self._statistic)(numpy.column_stack((Z[0,rx],
Z[1,ry]))
)
for rx,ry in permuter
)
if self.conditional_inference == True:
rz = numpy.arctanh(simulations)
firsthalf, secondhalf = numpy.array_split(rz, 2)
from scipy.stats import ttest_rel
post_hoc_test = ttest_rel(firsthalf, secondhalf)
if post_hoc_test.pvalue < .01:
warn('The null hypothesis that permutations of X yield equivalent'
' correlations to permutations of y is very unlikely given'
' the permutations conducted (p = {p}). It is very likely'
' that any inference based on the conditional permutation'
' will be incorrect. Use conditional_inference=False in'
' this case.')
self.reference_distribution_ = numpy.row_stack(simulations).T
above = self.reference_distribution_ >= self.associations_.reshape(-1,1)
larger = above.sum(axis=1)
extreme = numpy.minimum(larger, self.permutations - larger)
self.significance_ = (extreme + 1.) / (self.permutations + 1.)
self.reference_distribution_ = self.reference_distribution_.T
return self





@staticmethod
def _statistic(Z):
N,P = X.shape
# is in shape n, p, p
return (Z.T * Z.T[:,None]) / N


class Spatial_Pearson(BaseEstimator):
"""Global Spatial Pearson Statistic"""
Expand Down Expand Up @@ -72,8 +169,12 @@ def fit(self, x, y):
self.connectivity.sum(axis=1))

if (self.permutations is None):
self.reference_distribution_ = None
self.significance_ = numpy.nan
return self
elif self.permutations < 1:
self.reference_distribution_ = None
self.significance_ = numpy.nan
return self

if self.permutations:
Expand All @@ -93,7 +194,7 @@ def _statistic(Z,W):
return (Z.T @ ctc @ Z) / (ones.T @ ctc @ ones)


class Local_Spatial_Pearson(BaseEstimator):
class Spatial_Pearson_Local(BaseEstimator):
"""Local Spatial Pearson Statistic"""

def __init__(self, connectivity=None, permutations=999):
Expand Down Expand Up @@ -218,6 +319,7 @@ def fit(self, x, y):
self.reference_distribution_ = self.reference_distribution_.T
else:
self.reference_distribution_ = None
self.significance_ = numpy.nan
return self

@staticmethod
Expand Down
84 changes: 84 additions & 0 deletions tests/test_lee.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
import unittest
import libpysal
import geopandas
from libpysal.common import pandas, RTOL, ATOL
from .. import lee
import numpy


PANDAS_EXTINCT = pandas is None

class Lee_Tester(unittest.TestCase):
def setUp(self):
self.data = geopandas.read_file(libpysal.examples.get_path("columbus.shp"))
self.w = libpysal.weights.Queen.from_dataframe(self.data)
self.w.transform = 'r'
self.x = self.data[['HOVAL']].values
self.y = self.data[['CRIME']].values

def test_global(self):
numpy.random.seed(2478879)
result = lee.Spatial_Pearson(connectivity=self.w.sparse).fit(self.x, self.y)
known = numpy.array([[ 0.30136527, -0.23625603],
[-0.23625603, 0.53512008]])
numpy.testing.assert_allclose(known,result.association_, rtol=RTOL, atol=ATOL)
numpy.testing.assert_array_equal(result.reference_distribution_.shape, (999,2,2))
first_rep = numpy.array([[ 0.22803705, -0.08053692],
[-0.08053692, 0.18897318]])

second_rep = numpy.array([[ 0.14179274, -0.06962692],
[-0.06962692, 0.13688337]])
numpy.testing.assert_allclose(first_rep, result.reference_distribution_[0],
rtol=RTOL, atol=ATOL)
numpy.testing.assert_allclose(second_rep, result.reference_distribution_[1],
rtol=RTOL, atol=ATOL)

known_significance = numpy.array([[0.125, 0.026],
[0.026, 0.001]])
numpy.testing.assert_allclose(known_significance, result.significance_,
rtol=RTOL, atol=ATOL)

def test_local(self):
numpy.random.seed(2478879)
result = lee.Spatial_Pearson_Local(connectivity=self.w.sparse).fit(self.x, self.y)
known_locals = numpy.array([ 0.10246023, -0.24169198, -0.1308714 ,
0.00895543, -0.16080899, -0.00950808,
-0.14615398, -0.0627634 , 0.00661232,
-0.42354628, -0.73121006, 0.02060548,
0.05187356, 0.06515283, -0.64400723,
-0.37489818, -2.06573667, -0.10931854,
0.50823848, -0.06338637, -0.10559429,
0.03282849, -0.86618915, -0.62333825,
-0.40910044,-0.41866868, -0.00702983,
-0.4246288 , -0.52142507, -0.22481772,
0.1931263 , -1.39355214, 0.02036755,
0.22896308, -0.00240854, -0.30405211,
-0.66950406, -0.21481868, -0.60320158,
-0.38117303, -0.45584563, 0.32019362,
-0.02818729, -0.02214172, 0.05587915,
0.0295999 , -0.78818135, 0.16854472,
0.2378127 ])
numpy.testing.assert_allclose(known_locals, result.associations_,
rtol=RTOL, atol=ATOL)
significances = numpy.array([0.154, 0.291, 0.358, 0.231, 0.146,
0.335, 0.325, 0.388, 0.244, 0.111,
0.019, 0.165, 0.136, 0.073, 0.014,
0.029, 0.002, 0.376, 0.003, 0.265,
0.449, 0.121, 0.072, 0.006, 0.036,
0.06 , 0.355, 0.01 , 0.017, 0.168,
0.022, 0.003, 0.217, 0.016, 0.337,
0.137, 0.015, 0.128, 0.11 , 0.09 ,
0.168, 0.031, 0.457, 0.44 , 0.141,
0.249, 0.158, 0.018, 0.031])
numpy.testing.assert_allclose(significances, result.significance_,
rtol=RTOL, atol=ATOL)

suite = unittest.TestSuite()
test_classes = [Lee_Tester]
for i in test_classes:
a = unittest.TestLoader().loadTestsFromTestCase(i)
suite.addTest(a)

if __name__ == '__main__':
runner = unittest.TextTestRunner()
runner.run(suite)