diff --git a/docs/conf.py b/docs/conf.py
index 79ce90f1..dd5329cc 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -16,15 +16,15 @@
# add these directories to sys.path here. If the directory is relative to the
# documentation root, use os.path.abspath to make it absolute, like shown here.
#
-import sys, os
-import sphinx_bootstrap_theme
-
+import os
+import sys
-sys.path.insert(0, os.path.abspath('../'))
+import sphinx_bootstrap_theme
-#import your package to obtain the version info to display on the docs website
-import esda
+sys.path.insert(0, os.path.abspath("../"))
+# import your package to obtain the version info to display on the docs website
+import esda # noqa E402
# -- General configuration ------------------------------------------------
@@ -34,43 +34,44 @@
# Add any Sphinx extension module names here, as strings. They can be
# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
# ones.
-extensions = [#'sphinx_gallery.gen_gallery',
- 'sphinx.ext.autodoc',
- 'sphinx.ext.autosummary',
- 'sphinx.ext.viewcode',
- 'sphinxcontrib.bibtex',
- 'sphinx.ext.mathjax',
- 'sphinx.ext.doctest',
- 'sphinx.ext.intersphinx',
- 'numpydoc',
- 'matplotlib.sphinxext.plot_directive',
- 'nbsphinx']
+extensions = [ # 'sphinx_gallery.gen_gallery',
+ "sphinx.ext.autodoc",
+ "sphinx.ext.autosummary",
+ "sphinx.ext.viewcode",
+ "sphinxcontrib.bibtex",
+ "sphinx.ext.mathjax",
+ "sphinx.ext.doctest",
+ "sphinx.ext.intersphinx",
+ "numpydoc",
+ "matplotlib.sphinxext.plot_directive",
+ "nbsphinx",
+]
# Add any paths that contain templates here, relative to this directory.
-templates_path = ['_templates']
+templates_path = ["_templates"]
# The suffix(es) of source filenames.
# You can specify multiple suffix as a list of string:
#
# source_suffix = ['.rst', '.md']
-source_suffix = '.rst'
+source_suffix = ".rst"
# The master toctree document.
-master_doc = 'index'
+master_doc = "index"
# General information about the project.
project = "esda" # string of your project name, for example, 'giddy'
-copyright = '2018, pysal developers'
-author = 'pysal developers'
+copyright = "2018, pysal developers"
+author = "pysal developers"
# The version info for the project you're documenting, acts as replacement for
# |version| and |release|, also used in various other places throughout the
# built documents.
#
# The full version.
-version = esda.__version__ #should replace it with your PACKAGE_NAME
-release = esda.__version__ #should replace it with your PACKAGE_NAME
+version = esda.__version__ # should replace it with your PACKAGE_NAME
+release = esda.__version__ # should replace it with your PACKAGE_NAME
# The language for content autogenerated by Sphinx. Refer to documentation
# for a list of supported languages.
@@ -82,10 +83,10 @@
# List of patterns, relative to source directory, that match files and
# directories to ignore when looking for source files.
# This patterns also effect to html_static_path and html_extra_path
-exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store', 'tests/*']
+exclude_patterns = ["_build", "Thumbs.db", ".DS_Store", "tests/*"]
# The name of the Pygments (syntax highlighting) style to use.
-pygments_style = 'sphinx'
+pygments_style = "sphinx"
# If true, `todo` and `todoList` produce output, else they produce nothing.
todo_include_todos = False
@@ -96,13 +97,14 @@
# a list of builtin themes.
#
# html_theme = 'alabaster'
-html_theme = 'bootstrap'
+html_theme = "bootstrap"
html_theme_path = sphinx_bootstrap_theme.get_html_theme_path()
html_title = "%s v%s Manual" % (project, version)
-# (Optional) Logo of your package. Should be small enough to fit the navbar (ideally 24x24).
+# (Optional) Logo of your package.
+# Should be small enough to fit the navbar (ideally 24x24).
# Path should be relative to the ``_static`` files directory.
-#html_logo = "_static/images/package_logo.jpg"
+# html_logo = "_static/images/package_logo.jpg"
# (Optional) PySAL favicon
html_favicon = "_static/images/pysal_favicon.ico"
@@ -113,27 +115,20 @@
# documentation.
#
html_theme_options = {
-
# Navigation bar title. (Default: ``project`` value)
- 'navbar_title': "esda", # string of your project name, for example, 'giddy'
-
+ "navbar_title": "esda", # string of your project name, for example, 'giddy'
# Render the next and previous page links in navbar. (Default: true)
- 'navbar_sidebarrel': False,
-
+ "navbar_sidebarrel": False,
# Render the current pages TOC in the navbar. (Default: true)
- #'navbar_pagenav': True,
- #'navbar_pagenav': False,
-
+ # 'navbar_pagenav': True,
+ # 'navbar_pagenav': False,
# No sidebar
- 'nosidebar': True,
-
+ "nosidebar": True,
# Tab name for the current pages TOC. (Default: "Page")
- #'navbar_pagenav_name': "Page",
-
+ # 'navbar_pagenav_name': "Page",
# Global TOC depth for "site" navbar tab. (Default: 1)
# Switching to -1 shows all levels.
- 'globaltoc_depth': 2,
-
+ "globaltoc_depth": 2,
# Include hidden TOCs in Site navbar?
#
# Note: If this is "false", you cannot have mixed ``:hidden:`` and
@@ -141,54 +136,46 @@
# will break.
#
# Values: "true" (default) or "false"
- 'globaltoc_includehidden': "true",
-
+ "globaltoc_includehidden": "true",
# HTML navbar class (Default: "navbar") to attach to
element.
# For black navbar, do "navbar navbar-inverse"
- #'navbar_class': "navbar navbar-inverse",
-
+ # 'navbar_class': "navbar navbar-inverse",
# Fix navigation bar to top of page?
# Values: "true" (default) or "false"
- 'navbar_fixed_top': "true",
-
-
+ "navbar_fixed_top": "true",
# Location of link to source.
# Options are "nav" (default), "footer" or anything else to exclude.
- 'source_link_position': 'footer',
-
+ "source_link_position": "footer",
# Bootswatch (http://bootswatch.com/) theme.
#
# Options are nothing (default) or the name of a valid theme
# such as "amelia" or "cosmo", "yeti", "flatly".
- 'bootswatch_theme': "yeti",
-
+ "bootswatch_theme": "yeti",
# Choose Bootstrap version.
# Values: "3" (default) or "2" (in quotes)
- 'bootstrap_version': "3",
-
+ "bootstrap_version": "3",
# Navigation bar menu
- 'navbar_links': [
- ("Installation", "installation"),
- ("Tutorial", "tutorial"),
- ("API", "api"),
- ("References", "references"),
- ],
-
+ "navbar_links": [
+ ("Installation", "installation"),
+ ("Tutorial", "tutorial"),
+ ("API", "api"),
+ ("References", "references"),
+ ],
}
# Add any paths that contain custom static files (such as style sheets) here,
# relative to this directory. They are copied after the builtin static files,
# so a file named "default.css" will overwrite the builtin "default.css".
-html_static_path = ['_static']
+html_static_path = ["_static"]
# Custom sidebar templates, maps document names to template names.
-#html_sidebars = {}
+# html_sidebars = {}
# html_sidebars = {'sidebar': ['localtoc.html', 'sourcelink.html', 'searchbox.html']}
# -- Options for HTMLHelp output ------------------------------------------
# Output file base name for HTML help builder.
-htmlhelp_basename = 'esda'+'doc'
+htmlhelp_basename = "esda" + "doc"
# -- Options for LaTeX output ---------------------------------------------
@@ -197,15 +184,12 @@
# The paper size ('letterpaper' or 'a4paper').
#
# 'papersize': 'letterpaper',
-
# The font size ('10pt', '11pt' or '12pt').
#
# 'pointsize': '10pt',
-
# Additional stuff for the LaTeX preamble.
#
# 'preamble': '',
-
# Latex figure (float) alignment
#
# 'figure_align': 'htbp',
@@ -215,8 +199,7 @@
# (source start file, target name, title,
# author, documentclass [howto, manual, or own class]).
latex_documents = [
- (master_doc, 'esda.tex', u'esda Documentation',
- u'pysal developers', 'manual'),
+ (master_doc, "esda.tex", "esda Documentation", "pysal developers", "manual"),
]
@@ -224,10 +207,7 @@
# One entry per manual page. List of tuples
# (source start file, name, description, authors, manual section).
-man_pages = [
- (master_doc, 'esda', u'esda Documentation',
- [author], 1)
-]
+man_pages = [(master_doc, "esda", "esda Documentation", [author], 1)]
# -- Options for Texinfo output -------------------------------------------
@@ -236,9 +216,15 @@
# (source start file, target name, title, author,
# dir menu entry, description, category)
texinfo_documents = [
- (master_doc, 'esda', u'esda Documentation',
- author, 'esda developers', 'One line description of project.',
- 'Miscellaneous'),
+ (
+ master_doc,
+ "esda",
+ "esda Documentation",
+ author,
+ "esda developers",
+ "One line description of project.",
+ "Miscellaneous",
+ ),
]
@@ -264,22 +250,34 @@ def setup(app):
# Example configuration for intersphinx: refer to the Python standard library.
intersphinx_mapping = {
- 'python': ('https://docs.python.org/3/', None),
- 'numpy': ('https://docs.scipy.org/doc/numpy', None),
- 'pandas': ('https://pandas.pydata.org/pandas-docs/stable/', None),
- 'geopandas': ('https://geopandas.readthedocs.io/en/latest/', None),
- 'sklearn': ('https://scikit-learn.org/stable/', None),
- 'giddy': ('https://giddy.readthedocs.io/en/latest/', None),
- 'libpysal': ('https://pysal.org/libpysal/', None),
- 'esda': ('https://esda.readthedocs.io/en/latest/', None),
- 'region': ('https://region.readthedocs.io/en/latest/', None),
- 'hdbscan': ('https://hdbscan.readthedocs.io/en/latest/', None)
-
+ "python": ("https://docs.python.org/3/", None),
+ "numpy": ("https://docs.scipy.org/doc/numpy", None),
+ "pandas": ("https://pandas.pydata.org/pandas-docs/stable/", None),
+ "geopandas": ("https://geopandas.readthedocs.io/en/latest/", None),
+ "sklearn": ("https://scikit-learn.org/stable/", None),
+ "giddy": ("https://giddy.readthedocs.io/en/latest/", None),
+ "libpysal": ("https://pysal.org/libpysal/", None),
+ "esda": ("https://esda.readthedocs.io/en/latest/", None),
+ "region": ("https://region.readthedocs.io/en/latest/", None),
+ "hdbscan": ("https://hdbscan.readthedocs.io/en/latest/", None),
}
-numpydoc_xref_ignore = {'type', 'optional', 'default', 'shape', 'fitted', 'instance',
- 'cluster', 'of', 'or', 'if', 'using', 'otherwise', 'required',
- 'from'}
+numpydoc_xref_ignore = {
+ "type",
+ "optional",
+ "default",
+ "shape",
+ "fitted",
+ "instance",
+ "cluster",
+ "of",
+ "or",
+ "if",
+ "using",
+ "otherwise",
+ "required",
+ "from",
+}
# This is processed by Jinja2 and inserted before each notebook
@@ -306,7 +304,7 @@ def setup(app):
\nbsphinxstartnotebook{\scriptsize\noindent\strut
\textcolor{gray}{The following section was generated from
\sphinxcode{\sphinxupquote{\strut {{ docname | escape_latex }}}} \dotfill}}
-"""
+""" # noqa E501
# This is processed by Jinja2 and inserted after each notebook
nbsphinx_epilog = r"""
diff --git a/esda/__init__.py b/esda/__init__.py
index f12d3e4f..802c0fc8 100644
--- a/esda/__init__.py
+++ b/esda/__init__.py
@@ -3,40 +3,37 @@
=================================================
"""
-from . import adbscan
-from .gamma import Gamma
-from .geary import Geary
-from .geary_local import Geary_Local
-from .geary_local_mv import Geary_Local_MV
-from .getisord import G, G_Local
-from .join_counts import Join_Counts
-from .join_counts_local import Join_Counts_Local
-from .join_counts_local_bv import Join_Counts_Local_BV
-from .join_counts_local_mv import Join_Counts_Local_MV
-from .lee import Spatial_Pearson, Spatial_Pearson_Local
-from .losh import LOSH
-from .map_comparison import (
- external_entropy,
+from . import _version, adbscan, shape # noqa F401
+from .gamma import Gamma # noqa F401
+from .geary import Geary # noqa F401
+from .geary_local import Geary_Local # noqa F401
+from .geary_local_mv import Geary_Local_MV # noqa F401
+from .getisord import G, G_Local # noqa F401
+from .join_counts import Join_Counts # noqa F401
+from .join_counts_local import Join_Counts_Local # noqa F401
+from .join_counts_local_bv import Join_Counts_Local_BV # noqa F401
+from .join_counts_local_mv import Join_Counts_Local_MV # noqa F401
+from .lee import Spatial_Pearson, Spatial_Pearson_Local # noqa F401
+from .losh import LOSH # noqa F401
+from .map_comparison import ( # noqa F401
+ areal_entropy,
completeness,
+ external_entropy,
homogeneity,
overlay_entropy,
- areal_entropy,
)
-from .moran import (
+from .moran import ( # noqa F401
Moran,
Moran_BV,
Moran_BV_matrix,
Moran_Local,
Moran_Local_BV,
- Moran_Rate,
Moran_Local_Rate,
+ Moran_Rate,
)
-from .silhouettes import path_silhouette, boundary_silhouette
-from . import shape
-from .smaup import Smaup
-from .topo import prominence, isolation
-from .util import fdr
-
-from . import _version
+from .silhouettes import boundary_silhouette, path_silhouette # noqa F401
+from .smaup import Smaup # noqa F401
+from .topo import isolation, prominence # noqa F401
+from .util import fdr # noqa F401
__version__ = _version.get_versions()["version"]
diff --git a/esda/_version.py b/esda/_version.py
index d94bdb6d..bc5c4cfe 100644
--- a/esda/_version.py
+++ b/esda/_version.py
@@ -1,4 +1,3 @@
-
# This file helps to compute a version number in source trees obtained from
# git-archive tarball (such as those provided by githubs download-from-tag
# feature). Distribution tarballs (built by setup.py sdist) and build
@@ -58,18 +57,19 @@ class NotThisMethod(Exception):
def register_vcs_handler(vcs, method): # decorator
"""Create decorator to mark a method as the handler of a VCS."""
+
def decorate(f):
"""Store f in HANDLERS[vcs][method]."""
if vcs not in HANDLERS:
HANDLERS[vcs] = {}
HANDLERS[vcs][method] = f
return f
+
return decorate
# pylint:disable=too-many-arguments,consider-using-with # noqa
-def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False,
- env=None):
+def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False, env=None):
"""Call the given command(s)."""
assert isinstance(commands, list)
process = None
@@ -77,10 +77,13 @@ def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False,
try:
dispcmd = str([command] + args)
# remember shell=False, so use git.cmd on windows, not just git
- process = subprocess.Popen([command] + args, cwd=cwd, env=env,
- stdout=subprocess.PIPE,
- stderr=(subprocess.PIPE if hide_stderr
- else None))
+ process = subprocess.Popen(
+ [command] + args,
+ cwd=cwd,
+ env=env,
+ stdout=subprocess.PIPE,
+ stderr=(subprocess.PIPE if hide_stderr else None),
+ )
break
except EnvironmentError:
e = sys.exc_info()[1]
@@ -115,15 +118,21 @@ def versions_from_parentdir(parentdir_prefix, root, verbose):
for _ in range(3):
dirname = os.path.basename(root)
if dirname.startswith(parentdir_prefix):
- return {"version": dirname[len(parentdir_prefix):],
- "full-revisionid": None,
- "dirty": False, "error": None, "date": None}
+ return {
+ "version": dirname[len(parentdir_prefix) :],
+ "full-revisionid": None,
+ "dirty": False,
+ "error": None,
+ "date": None,
+ }
rootdirs.append(root)
root = os.path.dirname(root) # up a level
if verbose:
- print("Tried directories %s but none started with prefix %s" %
- (str(rootdirs), parentdir_prefix))
+ print(
+ "Tried directories %s but none started with prefix %s"
+ % (str(rootdirs), parentdir_prefix)
+ )
raise NotThisMethod("rootdir doesn't start with parentdir_prefix")
@@ -182,7 +191,7 @@ def git_versions_from_keywords(keywords, tag_prefix, verbose):
# starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of
# just "foo-1.0". If we see a "tag: " prefix, prefer those.
TAG = "tag: "
- tags = {r[len(TAG):] for r in refs if r.startswith(TAG)}
+ tags = {r[len(TAG) :] for r in refs if r.startswith(TAG)}
if not tags:
# Either we're using git < 1.8.3, or there really are no tags. We use
# a heuristic: assume all version tags have a digit. The old git %d
@@ -191,7 +200,7 @@ def git_versions_from_keywords(keywords, tag_prefix, verbose):
# between branches and tags. By ignoring refnames without digits, we
# filter out many common branch names like "release" and
# "stabilization", as well as "HEAD" and "master".
- tags = {r for r in refs if re.search(r'\d', r)}
+ tags = {r for r in refs if re.search(r"\d", r)}
if verbose:
print("discarding '%s', no digits" % ",".join(refs - tags))
if verbose:
@@ -199,24 +208,31 @@ def git_versions_from_keywords(keywords, tag_prefix, verbose):
for ref in sorted(tags):
# sorting will prefer e.g. "2.0" over "2.0rc1"
if ref.startswith(tag_prefix):
- r = ref[len(tag_prefix):]
+ r = ref[len(tag_prefix) :]
# Filter out refs that exactly match prefix or that don't start
# with a number once the prefix is stripped (mostly a concern
# when prefix is '')
- if not re.match(r'\d', r):
+ if not re.match(r"\d", r):
continue
if verbose:
print("picking %s" % r)
- return {"version": r,
- "full-revisionid": keywords["full"].strip(),
- "dirty": False, "error": None,
- "date": date}
+ return {
+ "version": r,
+ "full-revisionid": keywords["full"].strip(),
+ "dirty": False,
+ "error": None,
+ "date": date,
+ }
# no suitable tags, so version is "0+unknown", but full hex is still there
if verbose:
print("no suitable tags, using unknown + full revision id")
- return {"version": "0+unknown",
- "full-revisionid": keywords["full"].strip(),
- "dirty": False, "error": "no suitable tags", "date": None}
+ return {
+ "version": "0+unknown",
+ "full-revisionid": keywords["full"].strip(),
+ "dirty": False,
+ "error": "no suitable tags",
+ "date": None,
+ }
@register_vcs_handler("git", "pieces_from_vcs")
@@ -231,8 +247,7 @@ def git_pieces_from_vcs(tag_prefix, root, verbose, runner=run_command):
if sys.platform == "win32":
GITS = ["git.cmd", "git.exe"]
- _, rc = runner(GITS, ["rev-parse", "--git-dir"], cwd=root,
- hide_stderr=True)
+ _, rc = runner(GITS, ["rev-parse", "--git-dir"], cwd=root, hide_stderr=True)
if rc != 0:
if verbose:
print("Directory %s not under git control" % root)
@@ -240,10 +255,19 @@ def git_pieces_from_vcs(tag_prefix, root, verbose, runner=run_command):
# if there is a tag matching tag_prefix, this yields TAG-NUM-gHEX[-dirty]
# if there isn't one, this yields HEX[-dirty] (no NUM)
- describe_out, rc = runner(GITS, ["describe", "--tags", "--dirty",
- "--always", "--long",
- "--match", "%s*" % tag_prefix],
- cwd=root)
+ describe_out, rc = runner(
+ GITS,
+ [
+ "describe",
+ "--tags",
+ "--dirty",
+ "--always",
+ "--long",
+ "--match",
+ "%s*" % tag_prefix,
+ ],
+ cwd=root,
+ )
# --long was added in git-1.5.5
if describe_out is None:
raise NotThisMethod("'git describe' failed")
@@ -258,8 +282,7 @@ def git_pieces_from_vcs(tag_prefix, root, verbose, runner=run_command):
pieces["short"] = full_out[:7] # maybe improved later
pieces["error"] = None
- branch_name, rc = runner(GITS, ["rev-parse", "--abbrev-ref", "HEAD"],
- cwd=root)
+ branch_name, rc = runner(GITS, ["rev-parse", "--abbrev-ref", "HEAD"], cwd=root)
# --abbrev-ref was added in git-1.6.3
if rc != 0 or branch_name is None:
raise NotThisMethod("'git rev-parse --abbrev-ref' returned error")
@@ -299,17 +322,16 @@ def git_pieces_from_vcs(tag_prefix, root, verbose, runner=run_command):
dirty = git_describe.endswith("-dirty")
pieces["dirty"] = dirty
if dirty:
- git_describe = git_describe[:git_describe.rindex("-dirty")]
+ git_describe = git_describe[: git_describe.rindex("-dirty")]
# now we have TAG-NUM-gHEX or HEX
if "-" in git_describe:
# TAG-NUM-gHEX
- mo = re.search(r'^(.+)-(\d+)-g([0-9a-f]+)$', git_describe)
+ mo = re.search(r"^(.+)-(\d+)-g([0-9a-f]+)$", git_describe)
if not mo:
# unparseable. Maybe git-describe is misbehaving?
- pieces["error"] = ("unable to parse git-describe output: '%s'"
- % describe_out)
+ pieces["error"] = "unable to parse git-describe output: '%s'" % describe_out
return pieces
# tag
@@ -318,10 +340,12 @@ def git_pieces_from_vcs(tag_prefix, root, verbose, runner=run_command):
if verbose:
fmt = "tag '%s' doesn't start with prefix '%s'"
print(fmt % (full_tag, tag_prefix))
- pieces["error"] = ("tag '%s' doesn't start with prefix '%s'"
- % (full_tag, tag_prefix))
+ pieces["error"] = "tag '%s' doesn't start with prefix '%s'" % (
+ full_tag,
+ tag_prefix,
+ )
return pieces
- pieces["closest-tag"] = full_tag[len(tag_prefix):]
+ pieces["closest-tag"] = full_tag[len(tag_prefix) :]
# distance: number of commits since tag
pieces["distance"] = int(mo.group(2))
@@ -370,8 +394,7 @@ def render_pep440(pieces):
rendered += ".dirty"
else:
# exception #1
- rendered = "0+untagged.%d.g%s" % (pieces["distance"],
- pieces["short"])
+ rendered = "0+untagged.%d.g%s" % (pieces["distance"], pieces["short"])
if pieces["dirty"]:
rendered += ".dirty"
return rendered
@@ -400,8 +423,7 @@ def render_pep440_branch(pieces):
rendered = "0"
if pieces["branch"] != "master":
rendered += ".dev0"
- rendered += "+untagged.%d.g%s" % (pieces["distance"],
- pieces["short"])
+ rendered += "+untagged.%d.g%s" % (pieces["distance"], pieces["short"])
if pieces["dirty"]:
rendered += ".dirty"
return rendered
@@ -544,11 +566,13 @@ def render_git_describe_long(pieces):
def render(pieces, style):
"""Render the given version pieces into the requested style."""
if pieces["error"]:
- return {"version": "unknown",
- "full-revisionid": pieces.get("long"),
- "dirty": None,
- "error": pieces["error"],
- "date": None}
+ return {
+ "version": "unknown",
+ "full-revisionid": pieces.get("long"),
+ "dirty": None,
+ "error": pieces["error"],
+ "date": None,
+ }
if not style or style == "default":
style = "pep440" # the default
@@ -572,9 +596,13 @@ def render(pieces, style):
else:
raise ValueError("unknown style '%s'" % style)
- return {"version": rendered, "full-revisionid": pieces["long"],
- "dirty": pieces["dirty"], "error": None,
- "date": pieces.get("date")}
+ return {
+ "version": rendered,
+ "full-revisionid": pieces["long"],
+ "dirty": pieces["dirty"],
+ "error": None,
+ "date": pieces.get("date"),
+ }
def get_versions():
@@ -588,8 +616,7 @@ def get_versions():
verbose = cfg.verbose
try:
- return git_versions_from_keywords(get_keywords(), cfg.tag_prefix,
- verbose)
+ return git_versions_from_keywords(get_keywords(), cfg.tag_prefix, verbose)
except NotThisMethod:
pass
@@ -598,13 +625,16 @@ def get_versions():
# versionfile_source is the relative path from the top of the source
# tree (where the .git directory might live) to this file. Invert
# this to find the root from __file__.
- for _ in cfg.versionfile_source.split('/'):
+ for _ in cfg.versionfile_source.split("/"):
root = os.path.dirname(root)
except NameError:
- return {"version": "0+unknown", "full-revisionid": None,
- "dirty": None,
- "error": "unable to find root of source tree",
- "date": None}
+ return {
+ "version": "0+unknown",
+ "full-revisionid": None,
+ "dirty": None,
+ "error": "unable to find root of source tree",
+ "date": None,
+ }
try:
pieces = git_pieces_from_vcs(cfg.tag_prefix, root, verbose)
@@ -618,6 +648,10 @@ def get_versions():
except NotThisMethod:
pass
- return {"version": "0+unknown", "full-revisionid": None,
- "dirty": None,
- "error": "unable to compute version", "date": None}
+ return {
+ "version": "0+unknown",
+ "full-revisionid": None,
+ "dirty": None,
+ "error": "unable to compute version",
+ "date": None,
+ }
diff --git a/esda/adbscan.py b/esda/adbscan.py
index a1a0a39f..dbb69e26 100644
--- a/esda/adbscan.py
+++ b/esda/adbscan.py
@@ -5,14 +5,16 @@
__author__ = "Dani Arribas-Bel "
import warnings
-import pandas
+from collections import Counter
+
import numpy as np
+import pandas
from libpysal.cg.alpha_shapes import alpha_shape_auto
from scipy.spatial import cKDTree
-from collections import Counter
+from sklearn.base import BaseEstimator as _BaseEstimator
+from sklearn.base import ClusterMixin as _ClusterMixin
from sklearn.cluster import DBSCAN
from sklearn.neighbors import KNeighborsClassifier
-from sklearn.base import BaseEstimator as _BaseEstimator, ClusterMixin as _ClusterMixin
__all__ = ["ADBSCAN", "remap_lbls", "ensemble", "get_cluster_boundary"]
@@ -32,52 +34,50 @@ class ADBSCAN(_ClusterMixin, _BaseEstimator):
Parameters
----------
- eps : float
- The maximum distance between two samples for them to be considered
- as in the same neighborhood.
- min_samples : int
- The number of samples (or total weight) in a neighborhood
- for a point to be considered as a core point. This includes the
- point itself.
- algorithm : {'auto', 'ball_tree', 'kd_tree', 'brute'}, optional
- The algorithm to be used by the NearestNeighbors module
- to compute pointwise distances and find nearest neighbors.
- See NearestNeighbors module documentation for details.
- n_jobs : int
- [Optional. Default=1] The number of parallel jobs to run. If
- -1, then the number of jobs is set to the number of CPU
- cores.
- pct_exact : float
- [Optional. Default=0.1] Proportion of the entire dataset
- used to calculate DBSCAN in each draw
- reps : int
- [Optional. Default=100] Number of random samples to draw in order to
- build final solution
- keep_solus : Boolean
- [Optional. Default=False] If True, the `solus` and
- `solus_relabelled` objects are kept, else it is deleted to
- save memory
- pct_thr : float
- [Optional. Default=0.9] Minimum proportion of replications that a non-noise
- label need to be assigned to an observation for that observation to be labelled
- as such
+ eps : float
+ The maximum distance between two samples for them to be considered
+ as in the same neighborhood.
+ min_samples : int
+ The number of samples (or total weight) in a neighborhood
+ for a point to be considered as a core point. This includes the
+ point itself.
+ algorithm : {'auto', 'ball_tree', 'kd_tree', 'brute'}, optional
+ The algorithm to be used by the NearestNeighbors module
+ to compute pointwise distances and find nearest neighbors.
+ See NearestNeighbors module documentation for details.
+ n_jobs : int
+ [Optional. Default=1] The number of parallel jobs to run. If -1, then
+ the number of jobs is set to the number of CPU cores.
+ pct_exact : float
+ [Optional. Default=0.1] Proportion of the entire dataset
+ used to calculate DBSCAN in each draw
+ reps : int
+ [Optional. Default=100] Number of random samples to draw in order to
+ build final solution
+ keep_solus : bool
+ [Optional. Default=False] If True, the `solus` and `solus_relabelled`
+ objects are kept, else it is deleted to save memory
+ pct_thr : float
+ [Optional. Default=0.9] Minimum proportion of replications that
+ a non-noise label need to be assigned to an observation for that
+ observation to be labelled as such
Attributes
----------
- labels_ : array
- [Only available after `fit`] Cluster labels for each point in the
- dataset given to fit().
- Noisy (if the proportion of the most common label is < pct_thr)
- samples are given the label -1.
- votes : DataFrame
- [Only available after `fit`] Table indexed on `X.index` with
- `labels_` under the `lbls` column, and the frequency across draws of
- that label under `pct`
- solus : DataFrame, shape = [n, reps]
- [Only available after `fit`] Each solution of labels for every draw
- solus_relabelled: DataFrame, shape = [n, reps]
- [Only available after `fit`] Each solution of labels for
- every draw, relabelled to be consistent across solutions
+ labels_ : array
+ [Only available after `fit`] Cluster labels for each point in the
+ dataset given to fit().
+ Noisy (if the proportion of the most common label is < pct_thr)
+ samples are given the label -1.
+ votes : DataFrame
+ [Only available after `fit`] Table indexed on `X.index` with
+ `labels_` under the `lbls` column, and the frequency across draws of
+ that label under `pct`
+ solus : DataFrame, shape = [n, reps]
+ [Only available after `fit`] Each solution of labels for every draw
+ solus_relabelled : DataFrame, shape = [n, reps]
+ [Only available after `fit`] Each solution of labels for
+ every draw, relabelled to be consistent across solutions
Examples
--------
@@ -185,14 +185,11 @@ def fit(self, X, y=None, sample_weight=None, xy=["X", "Y"]):
)
# Multi-core implementation of parallel draws
if (self.n_jobs == -1) or (self.n_jobs > 1):
- pool = _setup_pool(self.n_jobs)
# Set different parallel seeds!!!
- warn_msg = (
- "Multi-core implementation only works on "
- "relabelling solutions. Execution of draws "
- "is still sequential."
+ warnings.warn(
+ "Multi-core implementation only works on relabelling solutions. "
+ "Execution of draws is still sequential."
)
- warnings.warn(warn_msg)
for i in range(self.reps):
pars = (
n,
@@ -416,8 +413,10 @@ def ensemble(solus_relabelled):
4 7 0.67
"""
- f = lambda a: Counter(a).most_common(1)[0]
- counts = np.array(list(map(f, solus_relabelled.values)))
+
+ counts = np.array(
+ list(map(lambda a: Counter(a).most_common(1)[0], solus_relabelled.values))
+ )
winner = counts[:, 0]
votes = counts[:, 1].astype(int) / solus_relabelled.shape[1]
pred = pandas.DataFrame(
@@ -501,7 +500,8 @@ def get_cluster_boundary(labels, xys, xy=["X", "Y"], n_jobs=1, crs=None, step=1)
>>> polys = get_cluster_boundary(labels, db)
>>> polys[0].wkt
'POLYGON ((0.7217553174317995 0.8192869956700687, 0.7605307121989587 0.9086488808086682, 0.9177741225129434 0.8568503024577332, 0.8126209616521135 0.6262871483113925, 0.6125260668293881 0.5475861559192435, 0.5425443680112613 0.7546476915298572, 0.7217553174317995 0.8192869956700687))'
- """
+ """ # noqa E501
+
try:
from geopandas import GeoSeries
except ModuleNotFoundError:
diff --git a/esda/crand.py b/esda/crand.py
index eeb7e615..d4413f4c 100644
--- a/esda/crand.py
+++ b/esda/crand.py
@@ -3,11 +3,12 @@
"""
import os
-import numpy as np
import warnings
+import numpy as np
+
try:
- from numba import njit, jit, prange, boolean
+ from numba import boolean, jit, njit, prange
except (ImportError, ModuleNotFoundError):
def jit(*dec_args, **dec_kwargs):
@@ -25,7 +26,6 @@ def intercepted_function(f, *f_args, **f_kwargs):
prange = range
boolean = bool
-import tempfile
__all__ = ["crand"]
@@ -119,8 +119,8 @@ def crand(
p_sim : ndarray
(N,) array with pseudo p-values from conditional permutation
rlocals : ndarray
- If keep=True, (N, permutations) array with simulated values of stat_func under the
- null of spatial randomness; else, empty (1, 1) array
+ If keep=True, (N, permutations) array with simulated values
+ of stat_func under the null of spatial randomness; else, empty (1, 1) array
"""
adj_matrix = w.sparse
@@ -153,28 +153,28 @@ def crand(
# we need to be careful to shuffle only *other* sites, not
# the self-site. This means we need to
- ### extract the self-weight, if any
+ # extract the self-weight, if any
self_weights = adj_matrix.diagonal()
- ### force the self-site weight to zero
+ # force the self-site weight to zero
with warnings.catch_warnings():
# massive changes to sparsity incur a cost, but it's not
# large for simply changing the diag
warnings.simplefilter("ignore")
adj_matrix.setdiag(0)
adj_matrix.eliminate_zeros()
- ### extract the weights from a now no-self-weighted adj_matrix
+ # extract the weights from a now no-self-weighted adj_matrix
other_weights = adj_matrix.data
- ### use the non-self weight as the cardinality, since
- ### this is the set we have to randomize.
- ### if there is a self-neighbor, we need to *not* shuffle the
- ### self neighbor, since conditional randomization conditions on site i.
+ # use the non-self weight as the cardinality, since
+ # this is the set we have to randomize.
+ # if there is a self-neighbor, we need to *not* shuffle the
+ # self neighbor, since conditional randomization conditions on site i.
cardinalities = np.array((adj_matrix != 0).sum(1)).flatten()
max_card = cardinalities.max()
permuted_ids = vec_permutations(max_card, n, permutations, seed)
if n_jobs != 1:
try:
- import joblib
+ import joblib # noqa F401
except (ModuleNotFoundError, ImportError):
warnings.warn(
f"Parallel processing is requested (n_jobs={n_jobs}),"
@@ -288,9 +288,9 @@ def compute_chunk(
scaling : float
Scaling value to apply to every local statistic
island_weight:
- value to use as a weight for the "fake" neighbor for every island. If numpy.nan,
- will propagate to the final local statistic depending on the `stat_func`. If 0, then
- the lag is always zero for islands.
+ value to use as a weight for the "fake" neighbor for every island.
+ If numpy.nan, will propagate to the final local statistic depending
+ on the `stat_func`. If 0, then the lag is always zero for islands.
Returns
-------
@@ -315,27 +315,19 @@ def compute_chunk(
for i in range(chunk_n):
cardinality = cardinalities[i]
if cardinality == 0: # deal with islands
- effective_cardinality = 1
weights_i = np.zeros(2, dtype=other_weights.dtype)
weights_i[1] = island_weight
else:
- effective_cardinality = cardinality
# we need to fix the self-weight to the first position
weights_i = np.zeros(cardinality + 1, dtype=other_weights.dtype)
weights_i[0] = self_weights[i]
- ### this chomps the next `cardinality` weights off of `weights`
- weights_i[1:] = other_weights[wloc : (wloc + cardinality)]
+ # this chomps the next `cardinality` weights off of `weights`
+ weights_i[1:] = other_weights[wloc : (wloc + cardinality)] # noqa E203
wloc += cardinality
- z_chunk_i = z_chunk[i]
mask[chunk_start + i] = False
- z_no_i = z[
- mask,
- ]
rstats = stat_func(chunk_start + i, z, permuted_ids, weights_i, scaling)
if keep:
- rlocals[
- i,
- ] = rstats
+ rlocals[i] = rstats
larger[i] = np.sum(rstats >= observed[i])
return larger, rlocals
@@ -370,7 +362,7 @@ def build_weights_offsets(cardinalities: np.ndarray, n_chunks: int):
chunk_size = np.int64(n / n_chunks) + 1
start = 0
for i in range(n_chunks):
- advance = cardinalities[start : start + chunk_size].sum()
+ advance = cardinalities[start : start + chunk_size].sum() # noqa E203
boundary_points[i + 1] = boundary_points[i] + advance
start += chunk_size
return boundary_points
@@ -432,11 +424,13 @@ def chunk_generator(
chunk_size = starts[1] - starts[0]
for i in range(n_jobs):
start = starts[i]
- z_chunk = z[start : (start + chunk_size)]
- self_weights_chunk = self_weights[start : (start + chunk_size)]
- observed_chunk = observed[start : (start + chunk_size)]
- cardinalities_chunk = cardinalities[start : (start + chunk_size)]
- w_chunk = other_weights[w_boundary_points[i] : w_boundary_points[i + 1]]
+ z_chunk = z[start : (start + chunk_size)] # noqa E203
+ self_weights_chunk = self_weights[start : (start + chunk_size)] # noqa E203
+ observed_chunk = observed[start : (start + chunk_size)] # noqa E203
+ cardinalities_chunk = cardinalities[start : (start + chunk_size)] # noqa E203
+ w_chunk = other_weights[
+ w_boundary_points[i] : w_boundary_points[i + 1] # noqa E203
+ ]
yield (
start,
z_chunk,
@@ -507,9 +501,9 @@ def parallel_crand(
scaling : float
Scaling value to apply to every local statistic
island_weight:
- value to use as a weight for the "fake" neighbor for every island. If numpy.nan,
- will propagate to the final local statistic depending on the `stat_func`. If 0, then
- the lag is always zero for islands.
+ value to use as a weight for the "fake" neighbor for every island.
+ If numpy.nan, will propagate to the final local statistic depending
+ on the `stat_func`. If 0, then the lag is always zero for islands.
Returns
-------
larger : ndarray
diff --git a/esda/gamma.py b/esda/gamma.py
index 7ca8342c..f6733b87 100644
--- a/esda/gamma.py
+++ b/esda/gamma.py
@@ -5,16 +5,14 @@
"""
__author__ = "Luc Anselin Serge Rey "
+import warnings
+
import numpy as np
from libpysal.weights.spatial_lag import lag_spatial
+
+from .crand import _prepare_univariate
+from .crand import njit as _njit
from .tabular import _univariate_handler
-import warnings
-from .crand import (
- crand as _crand_plus,
- njit as _njit,
- _prepare_univariate,
- _prepare_bivariate,
-)
__all__ = ["Gamma"]
@@ -215,7 +213,7 @@ def __calc(self, z, op):
g = (z * zl).sum()
elif op == "s": # squared difference
zs = np.zeros(z.shape)
- z2 = z ** 2
+ z2 = z**2
for i, i0 in enumerate(self.w.id_order):
neighbors = self.w.neighbor_offsets[i0]
wijs = self.w.weights[i0]
diff --git a/esda/geary.py b/esda/geary.py
index a8dda077..e1b07876 100644
--- a/esda/geary.py
+++ b/esda/geary.py
@@ -6,6 +6,7 @@
import numpy as np
import scipy.stats as stats
from libpysal import weights
+
from .tabular import _univariate_handler
__all__ = ["Geary"]
@@ -144,13 +145,13 @@ def __init__(self, y, w, transformation="r", permutations=999):
self.p_sim = (larger + 1.0) / (permutations + 1.0)
self.EC_sim = sum(sim) / permutations
self.seC_sim = np.array(sim).std()
- self.VC_sim = self.seC_sim ** 2
+ self.VC_sim = self.seC_sim**2
self.z_sim = (self.C - self.EC_sim) / self.seC_sim
self.p_z_sim = 1 - stats.norm.cdf(np.abs(self.z_sim))
@property
def _statistic(self):
- """ a standardized accessor for esda statistics"""
+ """a standardized accessor for esda statistics"""
return self.C
def __moments(self):
@@ -162,8 +163,8 @@ def __moments(self):
s2 = w.s2
s02 = s0 * s0
yd = y - y.mean()
- yd4 = yd ** 4
- yd2 = yd ** 2
+ yd4 = yd**4
+ yd2 = yd**2
n2 = n * n
k = (yd4.sum() / n) / ((yd2.sum() / n) ** 2)
A = (n - 1) * s1 * (n2 - 3 * n + 3 - (n - 1) * k)
@@ -191,27 +192,27 @@ def by_col(
Parameters
----------
- df : pandas.DataFrame
- a pandas dataframe with a geometry column
- cols : string or list of string
- name or list of names of columns to use to compute the statistic
- w : pysal weights object
- a weights object aligned with the dataframe. If not provided, this
- is searched for in the dataframe's metadata
- inplace : bool
- a boolean denoting whether to operate on the dataframe inplace or to
- return a series contaning the results of the computation. If
- operating inplace, with default configurations,
- the derived columns will be named like 'column_geary' and 'column_p_sim'
- pvalue : string
- a string denoting which pvalue should be returned. Refer to the
- the Geary statistic's documentation for available p-values
- outvals : list of strings
- list of arbitrary attributes to return as columns from the
- Geary statistic
- **stat_kws : keyword arguments
- options to pass to the underlying statistic. For this, see the
- documentation for the Geary statistic.
+ df : pandas.DataFrame
+ a pandas dataframe with a geometry column
+ cols : string or list of string
+ name or list of names of columns to use to compute the statistic
+ w : pysal weights object
+ a weights object aligned with the dataframe. If not provided, this
+ is searched for in the dataframe's metadata
+ inplace : bool
+ a boolean denoting whether to operate on the dataframe inplace or to
+ return a series contaning the results of the computation. If
+ operating inplace, with default configurations,
+ the derived columns will be named like 'column_geary' and 'column_p_sim'
+ pvalue : string
+ a string denoting which pvalue should be returned. Refer to the
+ the Geary statistic's documentation for available p-values
+ outvals : list of strings
+ list of arbitrary attributes to return as columns from the
+ Geary statistic
+ **stat_kws : dict
+ options to pass to the underlying statistic. For this, see the
+ documentation for the Geary statistic.
Returns
--------
diff --git a/esda/geary_local.py b/esda/geary_local.py
index 64e03024..728f316d 100644
--- a/esda/geary_local.py
+++ b/esda/geary_local.py
@@ -1,11 +1,10 @@
import numpy as np
import pandas as pd
-import warnings
-from scipy import sparse
-from scipy import stats
from sklearn.base import BaseEstimator
-import libpysal as lp
-from esda.crand import crand as _crand_plus, njit as _njit, _prepare_univariate
+
+from esda.crand import _prepare_univariate
+from esda.crand import crand as _crand_plus
+from esda.crand import njit as _njit
class Geary_Local(BaseEstimator):
@@ -63,10 +62,10 @@ def __init__(
of the function, since numba does not correctly
interpret external seeds nor
numpy.random.RandomState instances.
- island_weight:
- value to use as a weight for the "fake" neighbor for every island. If numpy.nan,
- will propagate to the final local statistic depending on the `stat_func`. If 0, then
- the lag is always zero for islands.
+ island_weight :
+ value to use as a weight for the "fake" neighbor for every island.
+ If numpy.nan, will propagate to the final local statistic depending
+ on the `stat_func`. If 0, then the lag is always zero for islands.
Attributes
----------
@@ -126,7 +125,6 @@ def fit(self, x):
sig = self.sig
keep_simulations = self.keep_simulations
n_jobs = self.n_jobs
- seed = self.seed
self.localG = self._statistic(x, w)
@@ -148,11 +146,13 @@ def fit(self, x):
# Create empty vector to fill
self.labs = np.empty(len(x)) * np.nan
# Outliers
- self.labs[(self.localG < Eij_mean) & (x > x_mean) & (self.p_sim <= sig)] = 1
+ locg_lt_eij = self.localG < Eij_mean
+ p_leq_sig = self.p_sim <= sig
+ self.labs[locg_lt_eij & (x > x_mean) & p_leq_sig] = 1
# Clusters
- self.labs[(self.localG < Eij_mean) & (x < x_mean) & (self.p_sim <= sig)] = 2
+ self.labs[locg_lt_eij & (x < x_mean) & p_leq_sig] = 2
# Other
- self.labs[(self.localG > Eij_mean) & (self.p_sim <= sig)] = 3
+ self.labs[(self.localG > Eij_mean) & p_leq_sig] = 3
# Non-significant
self.labs[self.p_sim > sig] = 4
@@ -191,7 +191,6 @@ def _statistic(x, w):
@_njit(fastmath=True)
def _local_geary(i, z, permuted_ids, weights_i, scaling):
- self_weight = weights_i[0]
other_weights = weights_i[1:]
zi, zrand = _prepare_univariate(i, z, permuted_ids, other_weights)
return (zi - zrand) ** 2 @ other_weights
diff --git a/esda/geary_local_mv.py b/esda/geary_local_mv.py
index 16f4ceaf..f9dbccc6 100644
--- a/esda/geary_local_mv.py
+++ b/esda/geary_local_mv.py
@@ -157,15 +157,13 @@ def _crand(self, zvariables):
np.random.shuffle(idsi)
vars_rand = []
for j in range(nvars):
- vars_rand.append(zvariables[j][idsi[rids[:, 0 : wc[i]]]])
+ vars_rand.append(zvariables[j][idsi[rids[:, 0 : wc[i]]]]) # noqa E203
# vars rand as tmp
# Calculate diff
diff = []
for z in range(nvars):
- diff.append(
- (np.array((zvariables[z][i] - vars_rand[z]) ** 2 * w[i])).sum(1)
- / nvars
- )
+ _diff = (np.array((zvariables[z][i] - vars_rand[z]) ** 2 * w[i]))
+ diff.append(_diff.sum(1) / nvars)
# add up differences
temp = np.array([sum(x) for x in zip(*diff)])
# Assign to object to be returned
diff --git a/esda/getisord.py b/esda/getisord.py
index c87231d7..7390f976 100644
--- a/esda/getisord.py
+++ b/esda/getisord.py
@@ -5,11 +5,15 @@
__all__ = ["G", "G_Local"]
import warnings
+
from libpysal.common import np, stats
from libpysal.weights.spatial_lag import lag_spatial as slag
from libpysal.weights.util import fill_diagonal
+
+from .crand import _prepare_univariate
+from .crand import crand as _crand_plus
+from .crand import njit as _njit
from .tabular import _univariate_handler
-from .crand import njit as _njit, crand as _crand_plus, _prepare_univariate
PERMUTATIONS = 999
@@ -29,40 +33,40 @@ class G(object):
Attributes
----------
- y : array
- original variable
- w : W
- DistanceBand W spatial weights based on distance band
- permutation : int
- the number of permutations
- G : float
- the value of statistic
- EG : float
- the expected value of statistic
- VG : float
- the variance of G under normality assumption
- z_norm : float
- standard normal test statistic
- p_norm : float
- p-value under normality assumption (one-sided)
- sim : array
- (if permutations > 0)
- vector of G values for permutated samples
- p_sim : float
- p-value based on permutations (one-sided)
- null: spatial randomness
- alternative: the observed G is extreme it is either extremely high or extremely low
- EG_sim : float
- average value of G from permutations
- VG_sim : float
- variance of G from permutations
- seG_sim : float
- standard deviation of G under permutations.
- z_sim : float
- standardized G based on permutations
- p_z_sim : float
- p-value based on standard normal approximation from
- permutations (one-sided)
+ y : array
+ original variable
+ w : W
+ DistanceBand W spatial weights based on distance band
+ permutation : int
+ the number of permutations
+ G : float
+ the value of statistic
+ EG : float
+ the expected value of statistic
+ VG : float
+ the variance of G under normality assumption
+ z_norm : float
+ standard normal test statistic
+ p_norm : float
+ p-value under normality assumption (one-sided)
+ sim : array
+ (if permutations > 0)
+ vector of G values for permutated samples
+ p_sim : float
+ p-value based on permutations (one-sided)
+ null: spatial randomness
+ alternative: the observed G is extreme it is either
+ extremely high or extremely low
+ EG_sim : float
+ average value of G from permutations
+ VG_sim : float
+ variance of G from permutations
+ seG_sim : float
+ standard deviation of G under permutations.
+ z_sim : float
+ standardized G based on permutations
+ p_z_sim : float
+ p-value based on standard normal approximation from permutations (one-sided)
Notes
-----
@@ -134,7 +138,7 @@ def __init__(self, y, w, permutations=PERMUTATIONS):
self.p_sim = (larger + 1.0) / (permutations + 1.0)
self.EG_sim = sum(sim) / permutations
self.seG_sim = sim.std()
- self.VG_sim = self.seG_sim ** 2
+ self.VG_sim = self.seG_sim**2
self.z_sim = (self.G - self.EG_sim) / self.seG_sim
self.p_z_sim = 1.0 - stats.norm.cdf(np.abs(self.z_sim))
@@ -166,7 +170,7 @@ def __moments(self):
EG2NUM = EG2
EG2DEN = ((sum(y) ** 2 - sum(y2)) ** 2) * n * (n - 1) * (n - 2) * (n - 3)
self.EG2 = EG2NUM / EG2DEN
- self.VG = self.EG2 - self.EG ** 2
+ self.VG = self.EG2 - self.EG**2
def __calc(self, y):
yl = slag(self.w, y)
@@ -175,7 +179,7 @@ def __calc(self, y):
@property
def _statistic(self):
- """ Standardized accessor for esda statistics"""
+ """Standardized accessor for esda statistics"""
return self.G
@classmethod
@@ -187,31 +191,31 @@ def by_col(
Parameters
----------
- df : pandas.DataFrame
- a pandas dataframe with a geometry column
- cols : string or list of string
- name or list of names of columns to use to compute the statistic
- w : pysal weights object
- a weights object aligned with the dataframe. If not provided, this
- is searched for in the dataframe's metadata
- inplace : bool
- a boolean denoting whether to operate on the dataframe inplace or to
- return a series contaning the results of the computation. If
- operating inplace, the derived columns will be named 'column_g'
- pvalue : string
- a string denoting which pvalue should be returned. Refer to the
- the G statistic's documentation for available p-values
- outvals : list of strings
- list of arbitrary attributes to return as columns from the
- G statistic
- **stat_kws : keyword arguments
- options to pass to the underlying statistic. For this, see the
- documentation for the G statistic.
+ df : pandas.DataFrame
+ a pandas dataframe with a geometry column
+ cols : string or list of string
+ name or list of names of columns to use to compute the statistic
+ w : pysal weights object
+ a weights object aligned with the dataframe. If not provided, this
+ is searched for in the dataframe's metadata
+ inplace : bool
+ a boolean denoting whether to operate on the dataframe inplace or to
+ return a series contaning the results of the computation. If
+ operating inplace, the derived columns will be named 'column_g'
+ pvalue : string
+ a string denoting which pvalue should be returned. Refer to the
+ the G statistic's documentation for available p-values
+ outvals : list of strings
+ list of arbitrary attributes to return as columns from the G statistic
+ **stat_kws : dict
+ options to pass to the underlying statistic. For this, see the
+ documentation for the G statistic.
Returns
--------
- If inplace, None, and operation is conducted on dataframe in memory. Otherwise,
- returns a copy of the dataframe with the relevant columns attached.
+ If inplace, None, and operation is conducted on dataframe
+ in memory. Otherwise, returns a copy of the dataframe with
+ the relevant columns attached.
"""
return _univariate_handler(
@@ -250,9 +254,9 @@ class G_Local(object):
provided in binary form, and standardization/self-weighting will be
handled by the function itself.
island_weight:
- value to use as a weight for the "fake" neighbor for every island. If numpy.nan,
- will propagate to the final local statistic depending on the `stat_func`. If 0, then
- the lag is always zero for islands.
+ value to use as a weight for the "fake" neighbor for every island.
+ If numpy.nan, will propagate to the final local statistic depending
+ on the `stat_func`. If 0, then the lag is always zero for islands.
Attributes
----------
@@ -279,9 +283,10 @@ class G_Local(object):
of arrays of floats (if permutations>0), vector of I values
for permutated samples
p_sim : array
- of floats, p-value based on permutations (one-sided)
- null - spatial randomness
- alternative - the observed G is extreme it is either extremely high or extremely low
+ of floats, p-value based on permutations (one-sided)
+ null - spatial randomness
+ alternative - the observed G is extreme
+ (it is either extremely high or extremely low)
EG_sim : array
of floats, average value of G from permutations
VG_sim : array
@@ -440,7 +445,6 @@ def __crand(self, keep_simulations):
k = self.w.max_neighbors + 1
rids = np.array([np.random.permutation(rid)[0:k] for i in prange])
ids = np.arange(self.w.n)
- ido = self.w.id_order
wc = self.__getCardinalities()
if self.w_transform == "r":
den = np.array(wc) + self.star
@@ -486,19 +490,18 @@ def calc(self):
empirical_mean = (y.sum() - y * remove_self) / N
# variance looks complex, yes, but it obtains from E[x^2] - E[x]^2.
# So, break it down to allow subtraction of the self-neighbor.
- mean_of_squares = ((y ** 2).sum() - (y ** 2) * remove_self) / N
- empirical_variance = mean_of_squares - empirical_mean ** 2
+ mean_of_squares = ((y**2).sum() - (y**2) * remove_self) / N
+ empirical_variance = mean_of_squares - empirical_mean**2
# Since we have corrected the diagonal, this should work
cardinality = np.asarray(W.sum(axis=1)).squeeze()
expected_value = cardinality / N
- expected_variance = (
- cardinality
- * (N - cardinality)
- / (N - 1)
- * (1 / N ** 2)
- * (empirical_variance / (empirical_mean ** 2))
- )
+
+ expected_variance = cardinality * (N - cardinality)
+ expected_variance /= (N - 1)
+ expected_variance *= (1 / N**2)
+ expected_variance *= (empirical_variance / (empirical_mean**2))
+
z_scores = (statistic - expected_value) / np.sqrt(expected_variance)
self.Gs = statistic
@@ -520,26 +523,26 @@ def by_col(
Parameters
----------
- df : pandas.DataFrame
- a pandas dataframe with a geometry column
- cols : string or list of string
- name or list of names of columns to use to compute the statistic
- w : pysal weights object
- a weights object aligned with the dataframe. If not provided, this
- is searched for in the dataframe's metadata
- inplace : bool
- a boolean denoting whether to operate on the dataframe inplace or to
- return a series contaning the results of the computation. If
- operating inplace, the derived columns will be named 'column_g_local'
- pvalue : string
- a string denoting which pvalue should be returned. Refer to the
- the G_Local statistic's documentation for available p-values
- outvals : list of strings
- list of arbitrary attributes to return as columns from the
- G_Local statistic
- **stat_kws : keyword arguments
- options to pass to the underlying statistic. For this, see the
- documentation for the G_Local statistic.
+ df : pandas.DataFrame
+ a pandas dataframe with a geometry column
+ cols : string or list of string
+ name or list of names of columns to use to compute the statistic
+ w : pysal weights object
+ a weights object aligned with the dataframe. If not provided, this
+ is searched for in the dataframe's metadata
+ inplace : bool
+ a boolean denoting whether to operate on the dataframe inplace or to
+ return a series contaning the results of the computation. If
+ operating inplace, the derived columns will be named 'column_g_local'
+ pvalue : string
+ a string denoting which pvalue should be returned. Refer to the
+ the G_Local statistic's documentation for available p-values
+ outvals : list of strings
+ list of arbitrary attributes to return as columns from the
+ G_Local statistic
+ **stat_kws : dict
+ options to pass to the underlying statistic. For this, see the
+ documentation for the G_Local statistic.
Returns
-------
@@ -608,7 +611,7 @@ def _infer_star_and_structure_w(weights, star, transform):
else: # star was something else, so try to fill the weights with it
try:
weights = fill_diagonal(weights, star)
- except:
+ except TypeError:
raise TypeError(
f"Type of star ({type(star)}) not understood."
f" Must be an integer, boolean, float, or numpy.ndarray."
@@ -640,9 +643,12 @@ def _g_local_star_crand(i, z, permuted_ids, weights_i, scaling):
if __name__ == "__main__":
- import geopandas, numpy, esda, importlib
- import matplotlib.pyplot as plt
- from libpysal import weights, examples
+
+ import geopandas
+ import numpy
+ from libpysal import examples, weights
+
+ import esda
df = geopandas.read_file(examples.get_path("NAT.shp"))
diff --git a/esda/join_counts.py b/esda/join_counts.py
index 7d46c4d4..5d5d888e 100644
--- a/esda/join_counts.py
+++ b/esda/join_counts.py
@@ -4,18 +4,12 @@
"""
__author__ = "Sergio J. Rey , Luc Anselin "
-from libpysal.weights.spatial_lag import lag_spatial
-from .tabular import _univariate_handler
-from scipy.stats import chi2_contingency
-from scipy.stats import chi2
import numpy as np
import pandas as pd
-from .crand import (
- crand as _crand_plus,
- njit as _njit,
- _prepare_univariate,
- _prepare_bivariate,
-)
+from scipy.stats import chi2, chi2_contingency
+
+from .crand import njit as _njit
+from .tabular import _univariate_handler
__all__ = ["Join_Counts"]
@@ -258,32 +252,33 @@ def by_col(
Parameters
----------
- df : pandas.DataFrame
- a pandas dataframe with a geometry column
- cols : string or list of string
- name or list of names of columns to use to compute the statistic
- w : pysal weights object
- a weights object aligned with the dataframe. If not provided, this
- is searched for in the dataframe's metadata
- inplace : bool
- a boolean denoting whether to operate on the dataframe inplace or to
- return a series contaning the results of the computation. If
- operating inplace, the derived columns will be named
- 'column_join_count'
- pvalue : string
- a string denoting which pvalue should be returned. Refer to the
- the Join_Count statistic's documentation for available p-values
- outvals : list of strings
- list of arbitrary attributes to return as columns from the
- Join_Count statistic
- **stat_kws : keyword arguments
- options to pass to the underlying statistic. For this, see the
- documentation for the Join_Count statistic.
+ df : pandas.DataFrame
+ a pandas dataframe with a geometry column
+ cols : string or list of string
+ name or list of names of columns to use to compute the statistic
+ w : pysal weights object
+ a weights object aligned with the dataframe. If not provided, this
+ is searched for in the dataframe's metadata
+ inplace : bool
+ a boolean denoting whether to operate on the dataframe inplace or to
+ return a series contaning the results of the computation. If
+ operating inplace, the derived columns will be named
+ 'column_join_count'
+ pvalue : string
+ a string denoting which pvalue should be returned. Refer to the
+ the Join_Count statistic's documentation for available p-values
+ outvals : list of strings
+ list of arbitrary attributes to return as columns from the
+ Join_Count statistic
+ **stat_k : dict
+ options to pass to the underlying statistic. For this, see the
+ documentation for the Join_Count statistic.
Returns
--------
- If inplace, None, and operation is conducted on dataframe in memory. Otherwise,
- returns a copy of the dataframe with the relevant columns attached.
+ If inplace, None, and operation is conducted on
+ dataframe in memory. Otherwise, returns a copy of the
+ dataframe with the relevant columns attached.
"""
if outvals is None:
diff --git a/esda/join_counts_local.py b/esda/join_counts_local.py
index 379be4f1..88321c1f 100644
--- a/esda/join_counts_local.py
+++ b/esda/join_counts_local.py
@@ -1,9 +1,11 @@
import numpy as np
import pandas as pd
-from sklearn.base import BaseEstimator
from libpysal import weights
-from esda.crand import crand as _crand_plus, njit as _njit, _prepare_univariate
+from sklearn.base import BaseEstimator
+from esda.crand import _prepare_univariate
+from esda.crand import crand as _crand_plus
+from esda.crand import njit as _njit
PERMUTATIONS = 999
@@ -26,30 +28,29 @@ def __init__(
Parameters
----------
- connectivity : scipy.sparse matrix object
- the connectivity structure describing
- the relationships between observed units.
- Need not be row-standardized.
- permutations : int
- number of random permutations for calculation of pseudo
- p_values
- n_jobs : int
- Number of cores to be used in the conditional randomisation. If -1,
- all available cores are used.
- keep_simulations : Boolean
- (default=True)
- If True, the entire matrix of replications under the null
- is stored in memory and accessible; otherwise, replications
- are not saved
- seed : None/int
- Seed to ensure reproducibility of conditional randomizations.
- Must be set here, and not outside of the function, since numba
- does not correctly interpret external seeds
- nor numpy.random.RandomState instances.
+ connectivity : scipy.sparse matrix object
+ the connectivity structure describing
+ the relationships between observed units.
+ Need not be row-standardized.
+ permutations : int
+ number of random permutations for calculation of pseudo
+ p_values
+ n_jobs : int
+ Number of cores to be used in the conditional randomisation. If -1,
+ all available cores are used.
+ keep_simulations : bool (default True)
+ If True, the entire matrix of replications under the null
+ is stored in memory and accessible; otherwise, replications
+ are not saved
+ seed : None/int
+ Seed to ensure reproducibility of conditional randomizations.
+ Must be set here, and not outside of the function, since numba
+ does not correctly interpret external seeds
+ nor numpy.random.RandomState instances.
island_weight:
- value to use as a weight for the "fake" neighbor for every island. If numpy.nan,
- will propagate to the final local statistic depending on the `stat_func`. If 0, then
- the lag is always zero for islands.
+ value to use as a weight for the "fake" neighbor for every island.
+ If numpy.nan, will propagate to the final local statistic depending
+ on the `stat_func`. If 0, then the lag is always zero for islands.
Attributes
----------
@@ -116,7 +117,7 @@ def fit(self, y, n_jobs=1, permutations=999):
keep_simulations = self.keep_simulations
n_jobs = self.n_jobs
- seed = self.seed
+ # seed = self.seed
self.y = y
self.n = len(y)
@@ -167,7 +168,7 @@ def _statistic(y, w):
@_njit(fastmath=True)
def _ljc_uni(i, z, permuted_ids, weights_i, scaling):
- self_weight = weights_i[0]
+ # self_weight = weights_i[0]
other_weights = weights_i[1:]
zi, zrand = _prepare_univariate(i, z, permuted_ids, other_weights)
return zi * (zrand @ other_weights)
diff --git a/esda/join_counts_local_bv.py b/esda/join_counts_local_bv.py
index f4ccb113..2da6c9a2 100644
--- a/esda/join_counts_local_bv.py
+++ b/esda/join_counts_local_bv.py
@@ -1,16 +1,11 @@
import numpy as np
import pandas as pd
-import warnings
-from scipy import sparse
-from sklearn.base import BaseEstimator
from libpysal import weights
-from esda.crand import (
- crand as _crand_plus,
- njit as _njit,
- _prepare_univariate,
- _prepare_bivariate,
-)
+from sklearn.base import BaseEstimator
+from esda.crand import _prepare_bivariate, _prepare_univariate
+from esda.crand import crand as _crand_plus
+from esda.crand import njit as _njit
PERMUTATIONS = 999
@@ -33,30 +28,28 @@ def __init__(
Parameters
----------
- connectivity : scipy.sparse matrix object
- the connectivity structure describing
- the relationships between observed units.
- Need not be row-standardized.
- permutations : int
- number of random permutations for calculation of pseudo
- p_values
- n_jobs : int
- Number of cores to be used in the conditional randomisation. If -1,
- all available cores are used.
- keep_simulations : Boolean
- (default=True)
- If True, the entire matrix of replications under the null
- is stored in memory and accessible; otherwise, replications
- are not saved
- seed : None/int
- Seed to ensure reproducibility of conditional randomizations.
- Must be set here, and not outside of the function, since numba
- does not correctly interpret external seeds
- nor numpy.random.RandomState instances.
+ connectivity : scipy.sparse matrix object
+ the connectivity structure describing
+ the relationships between observed units.
+ Need not be row-standardized.
+ permutations : int
+ number of random permutations for calculation of pseudo p_values
+ n_jobs : int
+ Number of cores to be used in the conditional randomisation.
+ If -1, all available cores are used.
+ keep_simulations : bool (default True)
+ If True, the entire matrix of replications under the null
+ is stored in memory and accessible; otherwise, replications
+ are not saved
+ seed : None/int
+ Seed to ensure reproducibility of conditional randomizations.
+ Must be set here, and not outside of the function, since numba
+ does not correctly interpret external seeds
+ nor numpy.random.RandomState instances.
island_weight:
- value to use as a weight for the "fake" neighbor for every island. If numpy.nan,
- will propagate to the final local statistic depending on the `stat_func`. If 0, then
- the lag is always zero for islands.
+ value to use as a weight for the "fake" neighbor for every island.
+ If numpy.nan, will propagate to the final local statistic depending
+ on the `stat_func`. If 0, then the lag is always zero for islands.
"""
self.connectivity = connectivity
@@ -99,9 +92,14 @@ def fit(self, x, z, case="CLC", n_jobs=1, permutations=999):
Commpop data replicating GeoDa tutorial (Case 1)
>>> import libpysal
>>> import geopandas as gpd
- >>> commpop = gpd.read_file("https://github.com/jeffcsauer/GSOC2020/raw/master/validation/data/commpop.gpkg")
+ >>> commpop = gpd.read_file(
+ ... "https://github.com/jeffcsauer/GSOC2020/raw/master/"
+ ... "validation/data/commpop.gpkg"
+ ... )
>>> w = libpysal.weights.Queen.from_dataframe(commpop)
- >>> LJC_BV_Case1 = Local_Join_Counts_BV(connectivity=w).fit(commpop['popneg'], commpop['popplus'], case='BJC')
+ >>> LJC_BV_Case1 = Local_Join_Counts_BV(
+ ... connectivity=w
+ ... ).fit(commpop['popneg'], commpop['popplus'], case='BJC')
>>> LJC_BV_Case1.LJC
>>> LJC_BV_Case1.p_sim
@@ -115,12 +113,13 @@ def fit(self, x, z, case="CLC", n_jobs=1, permutations=999):
>>> guerry_ds.loc[(guerry_ds['Infants'] > 23574), 'infq5'] = 1
>>> guerry_ds.loc[(guerry_ds['Donatns'] > 10973), 'donq5'] = 1
>>> w = libpysal.weights.Queen.from_dataframe(guerry_ds)
- >>> LJC_BV_Case2 = Local_Join_Counts_BV(connectivity=w).fit(guerry_ds['infq5'], guerry_ds['donq5'], case='CLC')
+ >>> LJC_BV_Case2 = Local_Join_Counts_BV(
+ ... connectivity=w
+ ... ).fit(guerry_ds['infq5'], guerry_ds['donq5'], case='CLC')
>>> LJC_BV_Case2.LJC
>>> LJC_BV_Case2.p_sim
"""
- # Need to ensure that the np.array() are of
- # dtype='float' for numba
+ # Need to ensure that the np.array() are of dtype='float' for numba
x = np.array(x, dtype="float")
z = np.array(z, dtype="float")
@@ -135,9 +134,7 @@ def fit(self, x, z, case="CLC", n_jobs=1, permutations=999):
self.w = w
self.case = case
- keep_simulations = self.keep_simulations
n_jobs = self.n_jobs
- seed = self.seed
self.LJC = self._statistic(x, z, w, case=case)
@@ -182,8 +179,7 @@ def _statistic(x, z, w, case):
# different from the esda.Join_Counts() function.
adj_list = w.to_adjlist(remove_symmetric=False)
- # First, set up a series that maps the values
- # to the weights table
+ # First, set up a series that maps the values to the weights table
zseries_x = pd.Series(x, index=w.id_order)
zseries_z = pd.Series(z, index=w.id_order)
@@ -233,7 +229,6 @@ def _statistic(x, z, w, case):
def _ljc_bv_case1(i, z, permuted_ids, weights_i, scaling):
zx = z[:, 0]
zy = z[:, 1]
- self_weight = weights_i[0]
other_weights = weights_i[1:]
zyi, zyrand = _prepare_univariate(i, zy, permuted_ids, other_weights)
return zx[i] * (zyrand @ other_weights)
@@ -241,9 +236,7 @@ def _ljc_bv_case1(i, z, permuted_ids, weights_i, scaling):
@_njit(fastmath=True)
def _ljc_bv_case2(i, z, permuted_ids, weights_i, scaling):
- zx = z[:, 0]
zy = z[:, 1]
- self_weight = weights_i[0]
other_weights = weights_i[1:]
zxi, zxrand, zyi, zyrand = _prepare_bivariate(i, z, permuted_ids, other_weights)
zf = zxrand * zyrand
diff --git a/esda/join_counts_local_mv.py b/esda/join_counts_local_mv.py
index 49e8fec6..0f52b718 100644
--- a/esda/join_counts_local_mv.py
+++ b/esda/join_counts_local_mv.py
@@ -1,18 +1,17 @@
import numpy as np
import pandas as pd
-from scipy import sparse
-from sklearn.base import BaseEstimator
from libpysal import weights
-from esda.crand import crand as _crand_plus, njit as _njit, _prepare_univariate
+from sklearn.base import BaseEstimator
+from esda.crand import _prepare_univariate
+from esda.crand import crand as _crand_plus
+from esda.crand import njit as _njit
PERMUTATIONS = 999
class Join_Counts_Local_MV(BaseEstimator):
-
"""Multivariate Local Join Count Statistic"""
-
def __init__(
self,
connectivity=None,
@@ -27,30 +26,27 @@ def __init__(
Parameters
----------
- connectivity : scipy.sparse matrix object
- the connectivity structure describing
- the relationships between observed units.
- Need not be row-standardized.
- permutations : int
- number of random permutations for calculation of pseudo
- p_values
- n_jobs : int
- Number of cores to be used in the conditional randomisation. If -1,
- all available cores are used.
- keep_simulations : Boolean
- (default=True)
- If True, the entire matrix of replications under the null
- is stored in memory and accessible; otherwise, replications
- are not saved
- seed : None/int
- Seed to ensure reproducibility of conditional randomizations.
- Must be set here, and not outside of the function, since numba
- does not correctly interpret external seeds
- nor numpy.random.RandomState instances.
- island_weight:
- value to use as a weight for the "fake" neighbor for every island. If numpy.nan,
- will propagate to the final local statistic depending on the `stat_func`. If 0, then
- the lag is always zero for islands.
+ connectivity : scipy.sparse matrix object
+ the connectivity structure describing
+ the relationships between observed units.
+ Need not be row-standardized.
+ permutations : int
+ number of random permutations for calculation of pseudo p_values
+ n_jobs : int
+ Number of cores to be used in the conditional randomisation. If -1,
+ all available cores are used.
+ keep_simulations : bool (default True)
+ If ``True``, the entire matrix of replications under the null is stored
+ in memory and accessible; otherwise, replications are not saved
+ seed : int (default None)
+ Seed to ensure reproducibility of conditional randomizations.
+ Must be set here, and not outside of the function, since numba
+ does not correctly interpret external seeds
+ nor numpy.random.RandomState instances.
+ island_weight : int or float (default 0)
+ value to use as a weight for the "fake" neighbor for every island.
+ If ``numpy.nan``, will propagate to the final local statistic depending
+ on the ``stat_func``. If ``0``, then the lag is always zero for islands.
"""
@@ -99,7 +95,9 @@ def fit(self, variables, n_jobs=1, permutations=999):
>>> guerry_ds.loc[(guerry_ds['Donatns'] > 10973), 'donq5'] = 1
>>> guerry_ds.loc[(guerry_ds['Suicids'] > 55564), 'suic5'] = 1
>>> w = libpysal.weights.Queen.from_dataframe(guerry_ds)
- >>> LJC_MV = Local_Join_Counts_MV(connectivity=w).fit([guerry_ds['infq5'], guerry_ds['donq5'], guerry_ds['suic5']])
+ >>> LJC_MV = Local_Join_Counts_MV(
+ ... connectivity=w
+ ... ).fit([guerry_ds['infq5'], guerry_ds['donq5'], guerry_ds['suic5']])
>>> LJC_MV.LJC
>>> LJC_MV.p_sim
"""
@@ -114,10 +112,6 @@ def fit(self, variables, n_jobs=1, permutations=999):
self.variables = np.array(variables, dtype="float")
- keep_simulations = self.keep_simulations
- n_jobs = self.n_jobs
- seed = self.seed
-
# Need to ensure that the product is an
# np.array() of dtype='float' for numba
self.ext = np.array(np.prod(np.vstack(variables), axis=0), dtype="float")
@@ -159,7 +153,7 @@ def _statistic(variables, w):
# focal and neighbor values == 1
focal_all = np.array(np.all(np.dstack(focal) == 1, axis=2))
neighbor_all = np.array(np.all(np.dstack(neighbor) == 1, axis=2))
- MCLC = (focal_all == True) & (neighbor_all == True)
+ MCLC = (focal_all) & (neighbor_all)
# Convert list of True/False to boolean array
# and unlist (necessary for building pd.DF)
MCLC = list(MCLC * 1)
@@ -183,7 +177,6 @@ def _statistic(variables, w):
@_njit(fastmath=True)
def _ljc_mv(i, z, permuted_ids, weights_i, scaling):
- self_weight = weights_i[0]
other_weights = weights_i[1:]
zi, zrand = _prepare_univariate(i, z, permuted_ids, other_weights)
return zi * (zrand @ other_weights)
diff --git a/esda/lee.py b/esda/lee.py
index 8426aa44..d2aeb480 100644
--- a/esda/lee.py
+++ b/esda/lee.py
@@ -1,14 +1,10 @@
import numpy
from scipy import sparse
+from sklearn import preprocessing, utils
from sklearn.base import BaseEstimator
-from sklearn import preprocessing
-from sklearn import utils
-from .crand import (
- crand as _crand_plus,
- njit as _njit,
- _prepare_univariate,
- _prepare_bivariate,
-)
+
+from .crand import _prepare_bivariate
+from .crand import njit as _njit
class Spatial_Pearson(BaseEstimator):
@@ -49,7 +45,7 @@ def fit(self, x, y):
"""
bivariate spatial pearson's R based on Eq. 18 of :cite:`Lee2001`.
- L = \dfrac{Z^T (V^TV) Z}{1^T (V^TV) 1}
+ L = \\dfrac{Z^T (V^TV) Z}{1^T (V^TV) 1}
Parameters
----------
@@ -79,10 +75,6 @@ def fit(self, x, y):
self.connectivity = sparse.eye(Z.shape[0])
self.association_ = self._statistic(Z, self.connectivity)
- standard_connectivity = sparse.csc_matrix(
- self.connectivity / self.connectivity.sum(axis=1)
- )
-
if self.permutations is None:
return self
elif self.permutations < 1:
@@ -154,22 +146,24 @@ def fit(self, x, y):
bivariate local pearson's R based on Eq. 22 in Lee (2001), using
site-wise conditional randomization from Moran_Local_BV.
- L_i = \dfrac{
- n \cdot
- \Big[\big(\sum_i w_{ij}(x_j - \bar{x})\big)
- \big(\sum_i w_{ij}(y_j - \bar{y})\big) \Big]
- }
- {
- \sqrt{\sum_i (x_i - \bar{x})^2}
- \sqrt{\sum_i (y_i - \bar{y})^2}}
- = \dfrac{
- n \cdot
- (\tilde{x}_j - \bar{x})
- (\tilde{y}_j - \bar{y})
- }
- {
- \sqrt{\sum_i (x_i - \bar{x})^2}
- \sqrt{\sum_i (y_i - \bar{y})^2}}
+ .. math::
+
+ L_i = \\dfrac{
+ n \\cdot
+ \\Big[\big(\\sum_i w_{ij}(x_j - \bar{x})\big)
+ \big(\\sum_i w_{ij}(y_j - \bar{y})\big) \\Big]
+ }
+ {
+ \\sqrt{\\sum_i (x_i - \bar{x})^2}
+ \\sqrt{\\sum_i (y_i - \bar{y})^2}}
+ = \\dfrac{
+ n \\cdot
+ (\tilde{x}_j - \bar{x})
+ (\tilde{y}_j - \bar{y})
+ }
+ {
+ \\sqrt{\\sum_i (x_i - \bar{x})^2}
+ \\sqrt{\\sum_i (y_i - \bar{y})^2}}
Lee, Sang Il. (2001), "Developing a bivariate spatial
association measure: An integration of Pearson's r and
@@ -207,7 +201,7 @@ def fit(self, x, y):
max_neighbors = (standard_connectivity != 0).sum(axis=1).max()
random_ids = numpy.array(
[
- numpy.random.permutation(n - 1)[0 : max_neighbors + 1]
+ numpy.random.permutation(n - 1)[0 : max_neighbors + 1] # noqa 208
for i in range(self.permutations)
]
)
diff --git a/esda/losh.py b/esda/losh.py
index 1087d040..181d5bce 100644
--- a/esda/losh.py
+++ b/esda/losh.py
@@ -1,9 +1,9 @@
-import numpy as np
import warnings
-from scipy import sparse
+
+import libpysal as lp
+import numpy as np
from scipy import stats
from sklearn.base import BaseEstimator
-import libpysal as lp
class LOSH(BaseEstimator):
@@ -88,17 +88,21 @@ def fit(self, y, a=2):
if self.inference is None:
return self
- elif self.inference == 'chi-square':
+ elif self.inference == "chi-square":
if a != 2:
- warnings.warn(f'Chi-square inference assumes that a=2, but \
- a={a}. This means the inference will be invalid!')
+ warnings.warn(
+ f"Chi-square inference assumes that a=2, but \
+ a={a}. This means the inference will be invalid!"
+ )
else:
- dof = 2/self.VarHi
- Zi = (2*self.Hi)/self.VarHi
+ dof = 2 / self.VarHi
+ Zi = (2 * self.Hi) / self.VarHi
self.pval = 1 - stats.chi2.cdf(Zi, dof)
else:
- raise NotImplementedError(f'The requested inference method \
- ({self.inference}) is not currently supported!')
+ raise NotImplementedError(
+ f"The requested inference method \
+ ({self.inference}) is not currently supported!"
+ )
return self
@@ -113,22 +117,23 @@ def _statistic(y, w, a):
rowsum = np.array(w.sparse.sum(axis=1)).flatten()
# Calculate spatial mean
- ylag = lp.weights.lag_spatial(w, y)/rowsum
+ ylag = lp.weights.lag_spatial(w, y) / rowsum
# Calculate and adjust residuals based on multiplier
- yresid = abs(y-ylag)**a
+ yresid = abs(y - ylag) ** a
# Calculate denominator of Hi equation
denom = np.mean(yresid) * np.array(rowsum)
# Carry out final Hi calculation
Hi = lp.weights.lag_spatial(w, yresid) / denom
# Calculate average of residuals
yresid_mean = np.mean(yresid)
+
# Calculate VarHi
n = len(y)
squared_rowsum = np.asarray(w.sparse.multiply(w.sparse).sum(axis=1)).flatten()
-
- VarHi = ((n-1)**-1) * \
- (denom**-2) * \
- ((np.sum(yresid**2)/n) - yresid_mean**2) * \
- ((n*squared_rowsum) - (rowsum**2))
+ term1 = ((n - 1) ** -1)
+ term2 = (denom**-2)
+ term3 = ((np.sum(yresid**2) / n) - yresid_mean**2)
+ term4 = ((n * squared_rowsum) - (rowsum**2))
+ VarHi = term1 * term2 * term3 * term4
return (Hi, ylag, yresid, VarHi)
diff --git a/esda/map_comparison.py b/esda/map_comparison.py
index d4f89e1c..6cd3338c 100644
--- a/esda/map_comparison.py
+++ b/esda/map_comparison.py
@@ -1,4 +1,5 @@
-import numpy, pandas
+import numpy
+import pandas
from scipy.special import entr
try:
@@ -27,7 +28,8 @@ def _cast(collection):
Cast a collection to a pygeos geometry array.
"""
try:
- import pygeos, geopandas
+ import geopandas
+ import pygeos
except (ImportError, ModuleNotFoundError) as exception:
raise type(exception)(
"pygeos and geopandas are required for map comparison statistics."
diff --git a/esda/mixture_smoothing.py b/esda/mixture_smoothing.py
index f9da6aff..66a86599 100644
--- a/esda/mixture_smoothing.py
+++ b/esda/mixture_smoothing.py
@@ -6,12 +6,18 @@
in CAMAN R package that is originally written by Peter Schlattmann.
"""
-__author__ = "Myunghwa Hwang , Luc Anselin , Serge Rey , "
+ "Luc Anselin , "
+ "Serge Rey >> mixture = NP_Mixture_Smoother(e,b)
- extracting the smoothed rates through the property r of the NP_Mixture_Smoother instance
+ extracting the smoothed rates through the property
+ r of the NP_Mixture_Smoother instance
>>> mixture.r
array([0.10982278, 0.03445531, 0.11018404, 0.11018604])
@@ -111,20 +118,20 @@ class NP_Mixture_Smoother(object):
>>> mixture.getRateEstimates()
(array([0.0911574, 0.0911574, 0.0911574, 0.0911574]), array([1, 1, 1, 1]))
- """
+ """ # noqa E501
- def __init__(self, e, b, k=50, acc=1.E-7, numiter=5000, limit=0.01):
+ def __init__(self, e, b, k=50, acc=1.0e-7, numiter=5000, limit=0.01):
self.e = np.asarray(e).flatten()
self.b = np.asarray(b).flatten()
self.n = len(e)
- self.w = 1. / self.n
+ self.w = 1.0 / self.n
self.k = k
self.acc = acc
self.numiter = numiter
self.limit = limit
r = self.mixalg()
- self.p = r['p']
- self.t = r['grid']
+ self.p = r["p"]
+ self.t = r["grid"]
self.r, self.category = self.getRateEstimates()
def getSeed(self):
@@ -133,7 +140,7 @@ def getSeed(self):
r_diff = r_max - r_min
step = r_diff / (self.k - 1)
grid = np.arange(r_min, r_max + step, step)
- p = np.ones(self.k) * 1. / self.k
+ p = np.ones(self.k) * 1.0 / self.k
return p, grid
def getMixedProb(self, grid):
@@ -146,11 +153,11 @@ def getMixedProb(self, grid):
def getGradient(self, mix, p):
mix_p = mix * p
mix_den = mix_p.sum(axis=1)
- obs_id = mix_den > 1.E-13
+ obs_id = mix_den > 1.0e-13
for i in range(self.k):
mix_den_len = len(mix_den)
- if (mix_den > 1.E-13).sum() == mix_den_len:
- mix_p[:, i] = (1. / mix_den_len) * mix[:, i] / mix_den
+ if (mix_den > 1.0e-13).sum() == mix_den_len:
+ mix_p[:, i] = (1.0 / mix_den_len) * mix[:, i] / mix_den
gradient = []
for i in range(self.k):
gradient.append(mix_p[:, i][obs_id].sum())
@@ -164,39 +171,44 @@ def getMaxGradient(self, gradient):
return (grad_max, grad_max_inx)
def getMinGradient(self, gradient, p):
- p_fil = p > 1.E-8
+ p_fil = p > 1.0e-8
grad_fil = gradient[p_fil]
grad_min = grad_fil.min()
grad_min_inx = np.where(p_fil)[0][grad_fil.argmin()]
- if grad_min >= 1.E+7:
- return (1.E+7, 1)
+ if grad_min >= 1.0e7:
+ return (1.0e7, 1)
return (grad_min, grad_min_inx)
def getStepsize(self, mix_den, ht):
- mix_den_fil = np.fabs(mix_den) > 1.E-7
+ ##############################################################################
+ # Something seems off in this function
+ # - a & b are defined twice
+ # - 4 defined, but unused, variables (commented out)
+ ##############################################################################
+ mix_den_fil = np.fabs(mix_den) > 1.0e-7
a = ht[mix_den_fil] / mix_den[mix_den_fil]
b = 1.0 + a
- b_fil = np.fabs(b) > 1.E-7
w = self.w
- sl = w * ht[b_fil] / b[b_fil]
- s11 = sl.sum()
- s0 = (w * ht).sum()
+ # b_fil = np.fabs(b) > 1.0e-7
+ # sl = w * ht[b_fil] / b[b_fil]
+ # s11 = sl.sum()
+ # s0 = (w * ht).sum()
- step, oldstep = 0., 0.
+ step, oldstep = 0.0, 0.0
for i in range(50):
- grad1, grad2 = 0., 0.
+ grad1, grad2 = 0.0, 0.0
for j in range(self.n):
a = mix_den[j] + step * ht[j]
- if math.fabs(a) > 1.E-7:
+ if math.fabs(a) > 1.0e-7:
b = ht[j] / a
grad1 = grad1 + w * b
grad2 = grad2 - w * b * b
- if math.fabs(grad2) > 1.E-10:
+ if math.fabs(grad2) > 1.0e-10:
step = step - grad1 / grad2
if oldstep > 1.0 and step > oldstep:
- step = 1.
+ step = 1.0
break
- if grad1 < 1.E-7:
+ if grad1 < 1.0e-7:
break
oldstep = step
if step > 1.0:
@@ -209,36 +221,49 @@ def vem(self, mix, p, grid):
grad, mix_den = self.getGradient(mix, p)
grad_max, grad_max_inx = self.getMaxGradient(grad)
grad_min, grad_min_inx = self.getMinGradient(grad, p)
- ht = (mix[:, grad_max_inx] - mix[:, grad_min_inx]
- ) * p[grad_min_inx]
+ ht = (mix[:, grad_max_inx] - mix[:, grad_min_inx]) * p[grad_min_inx]
st = self.getStepsize(mix_den, ht)
xs = st * p[grad_min_inx]
p[grad_min_inx] = p[grad_min_inx] - xs
p[grad_max_inx] = p[grad_max_inx] + xs
if (grad_max - 1.0) < self.acc or it == (self.numiter - 1):
- res = {'k': self.k, 'accuracy': grad_max - 1.0, 'p': p, 'grid': grid, 'gradient': grad, 'mix_den': mix_den}
+ res = {
+ "k": self.k,
+ "accuracy": grad_max - 1.0,
+ "p": p,
+ "grid": grid,
+ "gradient": grad,
+ "mix_den": mix_den,
+ }
break
return res
def update(self, p, grid):
- p_inx = p > 1.E-3
+ p_inx = p > 1.0e-3
new_p = p[p_inx]
new_grid = grid[p_inx]
self.k = len(new_p)
return new_p, new_grid
def em(self, nstep, grid, p):
- l = self.k - 1
+ l = self.k - 1 # noqa E741
w, n, e, b = self.w, self.n, self.e, self.b
if self.k == 1:
s11 = (w * b / np.ones(n)).sum()
s12 = (w * e / np.ones(n)).sum()
grid[l] = s11 / s12
- p[l] = 1.
+ p[l] = 1.0
mix = self.getMixedProb(grid)
grad, mix_den = self.getGradient(mix, p)
grad_max, grad_max_inx = self.getMaxGradient(grad)
- return {'accuracy': math.fabs(grad_max - 1), 'k': self.k, 'p': p, 'grid': grid, 'gradient': grad, 'mix_den': mix_den}
+ return {
+ "accuracy": math.fabs(grad_max - 1),
+ "k": self.k,
+ "p": p,
+ "grid": grid,
+ "gradient": grad,
+ "mix_den": mix_den,
+ }
else:
res = {}
for counter in range(nstep):
@@ -246,17 +271,28 @@ def em(self, nstep, grid, p):
grad, mix_den = self.getGradient(mix, p)
p = p * grad
su = p[:-1].sum()
- p[l] = 1. - su
+ p[l] = 1.0 - su
for j in range(self.k):
- mix_den_fil = mix_den > 1.E-10
+ mix_den_fil = mix_den > 1.0e-10
f_len = len(mix_den_fil)
- s11 = (w * e[mix_den_fil] / np.ones(f_len) * mix[mix_den_fil, j] / mix_den[mix_den_fil]).sum()
- s12 = (w * b[mix_den_fil] * (mix[mix_den_fil, j] / np.ones(f_len)) / mix_den[mix_den_fil]).sum()
- if s12 > 1.E-12:
+ ones = np.ones(f_len)
+ mdf_j = mix[mix_den_fil, j]
+ _mdf = mix_den[mix_den_fil]
+ s11 = (w * e[mix_den_fil] / ones * mdf_j / _mdf).sum()
+ s12 = (w * b[mix_den_fil] / ones * mdf_j / _mdf).sum()
+ if s12 > 1.0e-12:
grid[j] = s11 / s12
grad_max, grad_max_inx = self.getMaxGradient(grad)
- res = {'accuracy': math.fabs(grad_max - 1.), 'step': counter + 1, 'k': self.k, 'p': p, 'grid': grid, 'gradient': grad, 'mix_den': mix_den}
- if res['accuracy'] < self.acc and counter > 10:
+ res = {
+ "accuracy": math.fabs(grad_max - 1.0),
+ "step": counter + 1,
+ "k": self.k,
+ "p": p,
+ "grid": grid,
+ "gradient": grad,
+ "mix_den": mix_den,
+ }
+ if res["accuracy"] < self.acc and counter > 10:
break
return res
@@ -266,7 +302,7 @@ def getLikelihood(self, mix_den):
return r
def combine(self, res):
- p, grid, k = res['p'], res['grid'], self.k
+ p, grid, k = res["p"], res["grid"], self.k
diff = np.fabs(grid[:-1] - grid[1:])
bp_seeds = (diff >= self.limit).nonzero()[0] + 1
if k - len(bp_seeds) > 1:
@@ -279,26 +315,31 @@ def combine(self, res):
bp.append(bp_seeds[0])
for i in range(1, len(bp_seeds)):
if bp_seeds[i] - bp_seeds[i - 1] > 1:
- bp.append(a[i])
+ ##############################################################
+ # NEEDS ATTENTION
+ # `a` is not defined anywhere... what is it?
+ # -- seems like the condition is never met
+ # (bp_seeds[i] - bp_seeds[i - 1] > 1)
+ bp.append(a[i]) # noqa F821
+ ##############################################################
new_grid, new_p = [], []
for i in range(len(bp) - 1):
new_grid.append(grid[bp[i]])
- new_p.append(p[bp[i]:bp[i + 1]].sum())
- self.k = new_k = len(new_p)
+ new_p.append(p[bp[i] : bp[i + 1]].sum()) # noqa E203
+ self.k = len(new_p)
new_grid, new_p = np.array(new_grid), np.array(new_p)
mix = self.getMixedProb(new_grid)
grad, mix_den = self.getGradient(mix, new_p)
res = self.em(1, new_grid, new_p)
if res is not None:
- res['likelihood'] = self.getLikelihood(mix_den)
+ res["likelihood"] = self.getLikelihood(mix_den)
return res
def mixalg(self):
- e, b, k, n = self.e, self.b, self.k, self.n
p, grid = self.getSeed()
mix = self.getMixedProb(grid)
vem_res = self.vem(mix, p, grid)
- p, grid, k = vem_res['p'], vem_res['grid'], vem_res['k']
+ p, grid = vem_res["p"], vem_res["grid"]
n_p, n_g = self.update(p, grid)
em_res = self.em(self.numiter, n_g, n_p)
com_res = self.combine(em_res)
diff --git a/esda/moran.py b/esda/moran.py
index 5bc85316..6308664a 100644
--- a/esda/moran.py
+++ b/esda/moran.py
@@ -2,23 +2,24 @@
Moran's I Spatial Autocorrelation Statistics
"""
-__author__ = "Sergio J. Rey , \
- Dani Arribas-Bel , \
- Levi John Wolf "
-from libpysal.weights.spatial_lag import lag_spatial as slag
-from .smoothing import assuncao_rate
-from .tabular import _univariate_handler, _bivariate_handler
-from .crand import (
- crand as _crand_plus,
- njit as _njit,
- _prepare_univariate,
- _prepare_bivariate,
+__author__ = (
+ "Sergio J. Rey , "
+ "Dani Arribas-Bel , "
+ "Levi John Wolf "
)
-from warnings import warn, simplefilter
-from scipy import sparse
-import scipy.stats as stats
+
+from warnings import simplefilter
+
import numpy as np
-import tempfile
+import scipy.stats as stats
+from libpysal.weights.spatial_lag import lag_spatial as slag
+from scipy import sparse
+
+from .crand import _prepare_univariate
+from .crand import crand as _crand_plus
+from .crand import njit as _njit
+from .smoothing import assuncao_rate
+from .tabular import _bivariate_handler, _univariate_handler
__all__ = [
"Moran",
@@ -162,7 +163,7 @@ def __init__(
self.w = w
self.permutations = permutations
self.__moments()
- self.I = self.__calc(self.z)
+ self.I = self.__calc(self.z) # noqa E741
self.z_norm = (self.I - self.EI) / self.seI_norm
self.z_rand = (self.I - self.EI) / self.seI_rand
@@ -189,7 +190,7 @@ def __init__(
self.p_sim = (larger + 1.0) / (permutations + 1.0)
self.EI_sim = sim.sum() / permutations
self.seI_sim = np.array(sim).std()
- self.VI_sim = self.seI_sim ** 2
+ self.VI_sim = self.seI_sim**2
self.z_sim = (self.I - self.EI_sim) / self.seI_sim
if self.z_sim > 0:
self.p_z_sim = 1 - stats.norm.cdf(self.z_sim)
@@ -219,8 +220,8 @@ def __moments(self):
self.seI_norm = self.VI_norm ** (1 / 2.0)
# variance under randomization
- xd4 = z ** 4
- xd2 = z ** 2
+ xd4 = z**4
+ xd2 = z**2
k_num = xd4.sum() / n
k_den = (xd2.sum() / n) ** 2
k = k_num / k_den
@@ -250,32 +251,32 @@ def by_col(
Parameters
----------
- df : pandas.DataFrame
- a pandas dataframe with a geometry column
- cols : string or list of string
- name or list of names of columns to use to compute the statistic
- w : pysal weights object
- a weights object aligned with the dataframe. If not provided, this
- is searched for in the dataframe's metadata
- inplace : bool
- a boolean denoting whether to operate on the dataframe inplace or to
- return a series contaning the results of the computation. If
- operating inplace, the derived columns will be named
- 'column_moran'
- pvalue : string
- a string denoting which pvalue should be returned. Refer to the
- the Moran statistic's documentation for available p-values
- outvals : list of strings
- list of arbitrary attributes to return as columns from the
- Moran statistic
- **stat_kws : keyword arguments
- options to pass to the underlying statistic. For this, see the
- documentation for the Moran statistic.
+ df : pandas.DataFrame
+ a pandas dataframe with a geometry column
+ cols : string or list of string
+ name or list of names of columns to use to compute the statistic
+ w : pysal weights object
+ a weights object aligned with the dataframe. If not provided, this
+ is searched for in the dataframe's metadata
+ inplace : bool
+ a boolean denoting whether to operate on the dataframe inplace or to
+ return a series contaning the results of the computation. If
+ operating inplace, the derived columns will be named 'column_moran'
+ pvalue : string
+ a string denoting which pvalue should be returned. Refer to the
+ the Moran statistic's documentation for available p-values
+ outvals : list of strings
+ list of arbitrary attributes to return as columns from the
+ Moran statistic
+ **stat_kws : dict
+ options to pass to the underlying statistic. For this, see the
+ documentation for the Moran statistic.
Returns
--------
- If inplace, None, and operation is conducted on dataframe in memory. Otherwise,
- returns a copy of the dataframe with the relevant columns attached.
+ If inplace, None, and operation is conducted on dataframe
+ in memory. Otherwise, returns a copy of the dataframe with
+ the relevant columns attached.
"""
return _univariate_handler(
@@ -411,7 +412,7 @@ def __init__(self, x, y, w, transformation="r", permutations=PERMUTATIONS):
self.den = n - 1.0 # zx'zx = zy'zy = n-1
w.transform = transformation
self.w = w
- self.I = self.__calc(zy)
+ self.I = self.__calc(zy) # noqa E741
if permutations:
nrp = np.random.permutation
sim = [self.__calc(nrp(zy)) for i in range(permutations)]
@@ -423,7 +424,7 @@ def __init__(self, x, y, w, transformation="r", permutations=PERMUTATIONS):
self.p_sim = (larger + 1.0) / (permutations + 1.0)
self.EI_sim = sim.sum() / permutations
self.seI_sim = np.array(sim).std()
- self.VI_sim = self.seI_sim ** 2
+ self.VI_sim = self.seI_sim**2
self.z_sim = (self.I - self.EI_sim) / self.seI_sim
if self.z_sim > 0:
self.p_z_sim = 1 - stats.norm.cdf(self.z_sim)
@@ -457,39 +458,39 @@ def by_col(
Parameters
----------
- df : pandas.DataFrame
- a pandas dataframe with a geometry column
- X : list of strings
- column name or list of column names to use as X values to compute
- the bivariate statistic. If no Y is provided, pairwise comparisons
- among these variates are used instead.
- Y : list of strings
- column name or list of column names to use as Y values to compute
- the bivariate statistic. if no Y is provided, pariwise comparisons
- among the X variates are used instead.
- w : pysal weights object
- a weights object aligned with the dataframe. If not provided, this
- is searched for in the dataframe's metadata
- inplace : bool
- a boolean denoting whether to operate on the dataframe inplace or to
- return a series contaning the results of the computation. If
- operating inplace, the derived columns will be named
- 'column_moran_local'
- pvalue : string
- a string denoting which pvalue should be returned. Refer to the
- the Moran_BV statistic's documentation for available p-values
- outvals : list of strings
- list of arbitrary attributes to return as columns from the
- Moran_BV statistic
- **stat_kws : keyword arguments
- options to pass to the underlying statistic. For this, see the
- documentation for the Moran_BV statistic.
-
+ df : pandas.DataFrame
+ a pandas dataframe with a geometry column
+ X : list of strings
+ column name or list of column names to use as X values to compute
+ the bivariate statistic. If no Y is provided, pairwise comparisons
+ among these variates are used instead.
+ Y : list of strings
+ column name or list of column names to use as Y values to compute
+ the bivariate statistic. if no Y is provided, pariwise comparisons
+ among the X variates are used instead.
+ w : pysal weights object
+ a weights object aligned with the dataframe. If not provided, this
+ is searched for in the dataframe's metadata
+ inplace : bool
+ a boolean denoting whether to operate on the dataframe inplace or to
+ return a series contaning the results of the computation. If
+ operating inplace, the derived columns will be named
+ 'column_moran_local'
+ pvalue : string
+ a string denoting which pvalue should be returned. Refer to the
+ the Moran_BV statistic's documentation for available p-values
+ outvals : list of strings
+ list of arbitrary attributes to return as columns from the
+ Moran_BV statistic
+ **stat_kws : keyword arguments
+ options to pass to the underlying statistic. For this, see the
+ documentation for the Moran_BV statistic.
Returns
--------
- If inplace, None, and operation is conducted on dataframe in memory. Otherwise,
- returns a copy of the dataframe with the relevant columns attached.
+ If inplace, None, and operation is conducted on dataframe
+ in memory. Otherwise, returns a copy of the dataframe with
+ the relevant columns attached.
"""
return _bivariate_handler(
@@ -587,10 +588,11 @@ def _Moran_BV_Matrix_array(variables, w, permutations=0, varnames=None):
"""
Base calculation for MORAN_BV_Matrix
"""
+
+ k = len(variables)
if varnames is None:
varnames = ["x{}".format(i) for i in range(k)]
- k = len(variables)
rk = list(range(0, k - 1))
results = {}
for i in rk:
@@ -753,38 +755,39 @@ def by_col(
Parameters
----------
- df : pandas.DataFrame
- a pandas dataframe with a geometry column
- events : string or list of strings
- one or more names where events are stored
- populations : string or list of strings
- one or more names where the populations corresponding to the
- events are stored. If one population column is provided, it is
- used for all event columns. If more than one population column
- is provided but there is not a population for every event
- column, an exception will be raised.
- w : pysal weights object
- a weights object aligned with the dataframe. If not provided, this
- is searched for in the dataframe's metadata
- inplace : bool
- a boolean denoting whether to operate on the dataframe inplace or to
- return a series contaning the results of the computation. If
- operating inplace, the derived columns will be named
- 'column_moran_rate'
- pvalue : string
- a string denoting which pvalue should be returned. Refer to the
- the Moran_Rate statistic's documentation for available p-values
- outvals : list of strings
- list of arbitrary attributes to return as columns from the
- Moran_Rate statistic
- **stat_kws : keyword arguments
- options to pass to the underlying statistic. For this, see the
- documentation for the Moran_Rate statistic.
+ df : pandas.DataFrame
+ a pandas dataframe with a geometry column
+ events : string or list of strings
+ one or more names where events are stored
+ populations : string or list of strings
+ one or more names where the populations corresponding to the
+ events are stored. If one population column is provided, it is
+ used for all event columns. If more than one population column
+ is provided but there is not a population for every event
+ column, an exception will be raised.
+ w : pysal weights object
+ a weights object aligned with the dataframe. If not provided, this
+ is searched for in the dataframe's metadata
+ inplace : bool
+ a boolean denoting whether to operate on the dataframe inplace or to
+ return a series contaning the results of the computation. If
+ operating inplace, the derived columns will be named
+ 'column_moran_rate'
+ pvalue : string
+ a string denoting which pvalue should be returned. Refer to the
+ the Moran_Rate statistic's documentation for available p-values
+ outvals : list of strings
+ list of arbitrary attributes to return as columns from the
+ Moran_Rate statistic
+ **stat_kws : keyword arguments
+ options to pass to the underlying statistic. For this, see the
+ documentation for the Moran_Rate statistic.
Returns
--------
- If inplace, None, and operation is conducted on dataframe in memory. Otherwise,
- returns a copy of the dataframe with the relevant columns attached.
+ If inplace, None, and operation is conducted on dataframe
+ in memory. Otherwise, returns a copy of the dataframe with
+ the relevant columns attached.
"""
if not inplace:
@@ -884,11 +887,12 @@ class Moran_Local(object):
seed : None/int
Seed to ensure reproducibility of conditional randomizations.
Must be set here, and not outside of the function, since numba
- does not correctly interpret external seeds nor numpy.random.RandomState instances.
+ does not correctly interpret external seeds nor
+ numpy.random.RandomState instances.
island_weight:
- value to use as a weight for the "fake" neighbor for every island. If numpy.nan,
- will propagate to the final local statistic depending on the `stat_func`. If 0, then
- the lag is always zero for islands.
+ value to use as a weight for the "fake" neighbor for every island.
+ If numpy.nan, will propagate to the final local statistic depending
+ on the `stat_func`. If 0, then the lag is always zero for islands.
Attributes
----------
@@ -1076,12 +1080,9 @@ def __quads(self):
np = (1 - zp) * lp
nn = (1 - zp) * (1 - lp)
pn = zp * (1 - lp)
- self.q = (
- self.quads[0] * pp
- + self.quads[1] * np
- + self.quads[2] * nn
- + self.quads[3] * pn
- )
+
+ q0, q1, q2, q3 = self.quads
+ self.q = (q0 * pp) + (q1 * np) + (q2 * nn) + (q3 * pn)
def __moments(self):
W = self.w.sparse
@@ -1091,40 +1092,43 @@ def __moments(self):
m2 = (z * z).sum() / n
wi = np.asarray(W.sum(axis=1)).flatten()
wi2 = np.asarray(W.multiply(W).sum(axis=1)).flatten()
+
# ---------------------------------------------------------
# Conditional randomization null, Sokal 1998, Eqs. A7 & A8
# assume that division is as written, so that
# a - b / (n - 1) means a - (b / (n-1))
# ---------------------------------------------------------
- expectation = -(z ** 2 * wi) / ((n - 1) * m2)
- variance = (
- (z / m2) ** 2
- * (n / (n - 2))
- * (wi2 - (wi ** 2 / (n - 1)))
- * (m2 - (z ** 2 / (n - 1)))
- )
+ expectation = -(z**2 * wi) / ((n - 1) * m2)
+ var_term1 = (z / m2) ** 2
+ var_term2 = (n / (n - 2))
+ var_term3 = (wi2 - (wi**2 / (n - 1)))
+ var_term4 = (m2 - (z**2 / (n - 1)))
+ variance = var_term1 * var_term2 * var_term3 * var_term4
self.EIc = expectation
self.VIc = variance
+
# ---------------------------------------------------------
# Total randomization null, Sokal 1998, Eqs. A3 & A4*
# ---------------------------------------------------------
- m4 = z ** 4 / n
- b2 = m4 / m2 ** 2
+ m4 = z**4 / n
+ b2 = m4 / m2**2
expectation = -wi / (n - 1)
+
# assume that "avoiding identical subscripts" in :cite:`Anselin1995`
- # includes i==h and i==k, we can use the form due to :cite:`sokal1998local` below.
+ # includes i==h and i==k, we can use the form due to
+ # :cite:`sokal1998local` below.
+
# wikh = _wikh_fast(W)
# variance_anselin = (wi2 * (n - b2)/(n-1)
# + 2*wikh*(2*b2 - n) / ((n-1)*(n-2))
# - wi**2/(n-1)**2)
self.EI = expectation
- self.VI = (
- wi2 * (n - b2) / (n - 1)
- + (wi ** 2 - wi2) * (2 * b2 - n) / ((n - 1) * (n - 2))
- - (-wi / (n - 1)) ** 2
- )
+ n1 = n - 1
+ self.VI = wi2 * (n - b2) / n1
+ self.VI += (wi**2 - wi2) * (2 * b2 - n) / (n1 * (n - 2))
+ self.VI -= (-wi / n1) ** 2
@property
def _statistic(self):
@@ -1140,32 +1144,33 @@ def by_col(
Parameters
----------
- df : pandas.DataFrame
- a pandas dataframe with a geometry column
- cols : string or list of string
- name or list of names of columns to use to compute the statistic
- w : pysal weights object
- a weights object aligned with the dataframe. If not provided, this
- is searched for in the dataframe's metadata
- inplace : bool
- a boolean denoting whether to operate on the dataframe inplace or to
- return a series contaning the results of the computation. If
- operating inplace, the derived columns will be named
- 'column_moran_local'
- pvalue : string
- a string denoting which pvalue should be returned. Refer to the
- the Moran_Local statistic's documentation for available p-values
- outvals : list of strings
- list of arbitrary attributes to return as columns from the
- Moran_Local statistic
- **stat_kws : keyword arguments
- options to pass to the underlying statistic. For this, see the
- documentation for the Moran_Local statistic.
+ df : pandas.DataFrame
+ a pandas dataframe with a geometry column
+ cols : string or list of string
+ name or list of names of columns to use to compute the statistic
+ w : pysal weights object
+ a weights object aligned with the dataframe. If not provided, this
+ is searched for in the dataframe's metadata
+ inplace : bool
+ a boolean denoting whether to operate on the dataframe inplace or to
+ return a series contaning the results of the computation. If
+ operating inplace, the derived columns will be named
+ 'column_moran_local'
+ pvalue : string
+ a string denoting which pvalue should be returned. Refer to the
+ the Moran_Local statistic's documentation for available p-values
+ outvals : list of strings
+ list of arbitrary attributes to return as columns from the
+ Moran_Local statistic
+ **stat_kws : dict
+ options to pass to the underlying statistic. For this, see the
+ documentation for the Moran_Local statistic.
Returns
--------
- If inplace, None, and operation is conducted on dataframe in memory. Otherwise,
- returns a copy of the dataframe with the relevant columns attached.
+ If inplace, None, and operation is conducted on dataframe
+ in memory. Otherwise, returns a copy of the dataframe with
+ the relevant columns attached.
"""
return _univariate_handler(
@@ -1217,11 +1222,12 @@ class Moran_Local_BV(object):
seed : None/int
Seed to ensure reproducibility of conditional randomizations.
Must be set here, and not outside of the function, since numba
- does not correctly interpret external seeds nor numpy.random.RandomState instances.
+ does not correctly interpret external seeds nor
+ numpy.random.RandomState instances.
island_weight:
- value to use as a weight for the "fake" neighbor for every island. If numpy.nan,
- will propagate to the final local statistic depending on the `stat_func`. If 0, then
- the lag is always zero for islands.
+ value to use as a weight for the "fake" neighbor for every island.
+ If numpy.nan, will propagate to the final local statistic depending
+ on the `stat_func`. If 0, then the lag is always zero for islands.
Attributes
----------
@@ -1373,12 +1379,9 @@ def __quads(self):
np = (1 - zp) * lp
nn = (1 - zp) * (1 - lp)
pn = zp * (1 - lp)
- self.q = (
- self.quads[0] * pp
- + self.quads[1] * np
- + self.quads[2] * nn
- + self.quads[3] * pn
- )
+
+ q0, q1, q2, q3 = self.quads
+ self.q = (q0 * pp) + (q1 * np) + (q2 * nn) + (q3 * pn)
@property
def _statistic(self):
@@ -1402,39 +1405,39 @@ def by_col(
Parameters
----------
- df : pandas.DataFrame
- a pandas dataframe with a geometry column
- X : list of strings
- column name or list of column names to use as X values to compute
- the bivariate statistic. If no Y is provided, pairwise comparisons
- among these variates are used instead.
- Y : list of strings
- column name or list of column names to use as Y values to compute
- the bivariate statistic. if no Y is provided, pariwise comparisons
- among the X variates are used instead.
- w : pysal weights object
- a weights object aligned with the dataframe. If not provided, this
- is searched for in the dataframe's metadata
- inplace : bool
- a boolean denoting whether to operate on the dataframe inplace or to
- return a series contaning the results of the computation. If
- operating inplace, the derived columns will be named
- 'column_moran_local_bv'
- pvalue : string
- a string denoting which pvalue should be returned. Refer to the
- the Moran_Local_BV statistic's documentation for available p-values
- outvals : list of strings
- list of arbitrary attributes to return as columns from the
- Moran_Local_BV statistic
- **stat_kws : keyword arguments
- options to pass to the underlying statistic. For this, see the
- documentation for the Moran_Local_BV statistic.
-
+ df : pandas.DataFrame
+ a pandas dataframe with a geometry column
+ X : list of strings
+ column name or list of column names to use as X values to compute
+ the bivariate statistic. If no Y is provided, pairwise comparisons
+ among these variates are used instead.
+ Y : list of strings
+ column name or list of column names to use as Y values to compute
+ the bivariate statistic. if no Y is provided, pariwise comparisons
+ among the X variates are used instead.
+ w : pysal weights object
+ a weights object aligned with the dataframe. If not provided, this
+ is searched for in the dataframe's metadata
+ inplace : bool
+ a boolean denoting whether to operate on the dataframe inplace or to
+ return a series contaning the results of the computation. If
+ operating inplace, the derived columns will be named
+ 'column_moran_local_bv'
+ pvalue : string
+ a string denoting which pvalue should be returned. Refer to the
+ the Moran_Local_BV statistic's documentation for available p-values
+ outvals : list of strings
+ list of arbitrary attributes to return as columns from the
+ Moran_Local_BV statistic
+ **stat_kws : dict
+ options to pass to the underlying statistic. For this, see the
+ documentation for the Moran_Local_BV statistic.
Returns
--------
- If inplace, None, and operation is conducted on dataframe in memory. Otherwise,
- returns a copy of the dataframe with the relevant columns attached.
+ If inplace, None, and operation is conducted on dataframe
+ in memory. Otherwise, returns a copy of the dataframe with
+ the relevant columns attached.
"""
return _bivariate_handler(
@@ -1492,9 +1495,9 @@ class Moran_Local_Rate(Moran_Local):
Must be set here, and not outside of the function, since numba does not
correctly interpret external seeds nor numpy.random.RandomState instances.
island_weight: float
- value to use as a weight for the "fake" neighbor for every island. If numpy.nan,
- will propagate to the final local statistic depending on the `stat_func`. If 0, then
- the lag is always zero for islands.
+ value to use as a weight for the "fake" neighbor for every island.
+ If numpy.nan, will propagate to the final local statistic depending
+ on the `stat_func`. If 0, then the lag is always zero for islands.
Attributes
----------
@@ -1550,16 +1553,19 @@ class Moran_Local_Rate(Moran_Local):
>>> e = np.array(f.by_col('SID79'))
>>> b = np.array(f.by_col('BIR79'))
>>> from esda.moran import Moran_Local_Rate
- >>> lm = Moran_Local_Rate(e, b, w, transformation = "r", permutations = 99)
+ >>> lm = Moran_Local_Rate(e, b, w, transformation="r", permutations=99)
>>> lm.q[:10]
array([2, 4, 3, 1, 2, 1, 1, 4, 2, 4])
- >>> lm = Moran_Local_Rate(e, b, w, transformation = "r", permutations = 99, geoda_quads=True)
+ >>> lm = Moran_Local_Rate(
+ ... e, b, w, transformation = "r", permutations=99, geoda_quads=True
+ )
>>> lm.q[:10]
array([3, 4, 2, 1, 3, 1, 1, 4, 3, 4])
Note random components result is slightly different values across
architectures so the results have been removed from doctests and will be
moved into unittests that are conditional on architectures
+
"""
def __init__(
@@ -1612,37 +1618,39 @@ def by_col(
Parameters
----------
- df : pandas.DataFrame
- a pandas dataframe with a geometry column
- events : string or list of strings
- one or more names where events are stored
- populations : string or list of strings
- one or more names where the populations corresponding to the
- events are stored. If one population column is provided, it is
- used for all event columns. If more than one population column
- is provided but there is not a population for every event
- column, an exception will be raised.
- w : pysal weights object
- a weights object aligned with the dataframe. If not provided, this
- is searched for in the dataframe's metadata
- inplace : bool
- a boolean denoting whether to operate on the dataframe inplace or to
- return a series contaning the results of the computation. If
- operating inplace, the derived columns will be named 'column_moran_local_rate'
- pvalue : string
- a string denoting which pvalue should be returned. Refer to the
- the Moran_Local_Rate statistic's documentation for available p-values
- outvals : list of strings
- list of arbitrary attributes to return as columns from the
- Moran_Local_Rate statistic
- **stat_kws : keyword arguments
- options to pass to the underlying statistic. For this, see the
- documentation for the Moran_Local_Rate statistic.
+ df : pandas.DataFrame
+ a pandas dataframe with a geometry column
+ events : string or list of strings
+ one or more names where events are stored
+ populations : string or list of strings
+ one or more names where the populations corresponding to the
+ events are stored. If one population column is provided, it is
+ used for all event columns. If more than one population column
+ is provided but there is not a population for every event
+ column, an exception will be raised.
+ w : pysal weights object
+ a weights object aligned with the dataframe. If not provided, this
+ is searched for in the dataframe's metadata
+ inplace : bool
+ a boolean denoting whether to operate on the dataframe
+ inplace or to return a series contaning the results of
+ the computation. If operating inplace, the derived columns
+ will be named 'column_moran_local_rate'
+ pvalue : string
+ a string denoting which pvalue should be returned. Refer to the
+ the Moran_Local_Rate statistic's documentation for available p-values
+ outvals : list of strings
+ list of arbitrary attributes to return as columns from the
+ Moran_Local_Rate statistic
+ **stat_kws : dict
+ options to pass to the underlying statistic. For this, see the
+ documentation for the Moran_Local_Rate statistic.
Returns
--------
- If inplace, None, and operation is conducted on dataframe in memory. Otherwise,
- returns a copy of the dataframe with the relevant columns attached.
+ If inplace, None, and operation is conducted on dataframe
+ in memory. Otherwise, returns a copy of the dataframe with
+ the relevant columns attached.
"""
if not inplace:
@@ -1711,7 +1719,9 @@ def _wikh_fast(W, sokal_correction=False):
"""
This computes the outer product of weights for each observation.
- w_{i(kh)} = \sum_{k \neq i}^n \sum_{h \neq i}^n w_ik * w_hk
+ .. math::
+
+ w_{i(kh)} = \\sum_{k \neq i}^n \\sum_{h \neq i}^n w_ik * w_hk
If the :cite:`sokal1998local` version is used, then we also have h \neq k
Since this version introduces a simplification in the expression
@@ -1720,14 +1730,14 @@ def _wikh_fast(W, sokal_correction=False):
Arguments
---------
- W : scipy sparse matrix
- a sparse matrix describing the spatial relationships
- between observations.
- sokal_correction: bool
- Whether to avoid self-neighbors in the summation of weights.
- If False (default), then the outer product of all weights
- for observation i are used, regardless if they are of the form
- w_hh or w_kk.
+ W : scipy sparse matrix
+ a sparse matrix describing the spatial relationships
+ between observations.
+ sokal_correction : bool
+ Whether to avoid self-neighbors in the summation of weights.
+ If False (default), then the outer product of all weights
+ for observation i are used, regardless if they are of the form
+ w_hh or w_kk.
Returns
-------
diff --git a/esda/shape.py b/esda/shape.py
index 3f103a77..c45ee5b7 100644
--- a/esda/shape.py
+++ b/esda/shape.py
@@ -15,7 +15,8 @@ def _cast(collection):
Cast a collection to a pygeos geometry array.
"""
try:
- import pygeos, geopandas
+ import geopandas
+ import pygeos
except (ImportError, ModuleNotFoundError) as exception:
raise type(exception)("pygeos and geopandas are required for shape statistics.")
@@ -66,7 +67,6 @@ def get_angles(collection, return_indices=False):
exploded = pygeos.get_parts(ga)
coords = pygeos.get_coordinates(exploded)
n_coords_per_geom = pygeos.get_num_coordinates(exploded)
- n_parts_per_geom = pygeos.get_num_geometries(exploded)
angles = numpy.asarray(_get_angles(coords, n_coords_per_geom))
if return_indices:
return angles, numpy.repeat(
@@ -85,11 +85,9 @@ def _get_angles(points, n_coords_per_geom):
"""
# Start at the first point of the first geometry
offset = int(0)
- start = points[0]
on_geom = 0
on_coord = 0
result = []
- n_points = len(points)
while True:
# if we're on the last point before the closure point,
if on_coord == (n_coords_per_geom[on_geom] - 1):
@@ -125,7 +123,7 @@ def _get_angles(points, n_coords_per_geom):
def isoperimetric_quotient(collection):
- """
+ r"""
The Isoperimetric quotient, defined as the ratio of a polygon's area to the
area of the equi-perimeter circle.
@@ -206,7 +204,7 @@ def diameter_ratio(collection, rotated=True):
if rotated:
box = pygeos.minimum_rotated_rectangle(ga)
coords = pygeos.get_coordinates(box)
- a, b, c, d = (coords[0::5], coords[1::5], coords[2::5], coords[3::5])
+ a, b, _, d = (coords[0::5], coords[1::5], coords[2::5], coords[3::5])
widths = numpy.sqrt(numpy.sum((a - b) ** 2, axis=1))
heights = numpy.sqrt(numpy.sum((a - d) ** 2, axis=1))
else:
@@ -264,9 +262,12 @@ def convex_hull_ratio(collection):
def fractal_dimension(collection, support="hex"):
"""
- The fractal dimension of the boundary of a shape, assuming a given spatial support for the geometries.
+ The fractal dimension of the boundary of a shape, assuming a given
+ spatial support for the geometries.
- Note that this derivation assumes a specific ideal spatial support for the polygon, and is thus may not return valid results for complex or highly irregular geometries.
+ Note that this derivation assumes a specific ideal spatial support
+ for the polygon, and is thus may not return valid results for
+ complex or highly irregular geometries.
"""
ga = _cast(collection)
P = pygeos.measurement.length(ga)
@@ -279,7 +280,8 @@ def fractal_dimension(collection, support="hex"):
return 2 * numpy.log(P / (2 * numpy.pi)) / numpy.log(A / numpy.pi)
else:
raise ValueError(
- f"The support argument must be one of 'hex', 'circle', or 'square', but {support} was provided."
+ "The support argument must be one of 'hex', 'circle', or 'square', "
+ f"but {support} was provided."
)
@@ -315,14 +317,16 @@ def squareness(collection):
def rectangularity(collection):
"""
- Ratio of the area of the shape to the area of its minimum bounding rotated rectangle
+ Ratio of the area of the shape to the area
+ of its minimum bounding rotated rectangle
Reveals a polygon’s degree of being curved inward.
.. math::
\\frac{A}{A_{MBR}}
- where :math:`A` is the area and :math:`A_{MBR}` is the area of minimum bounding
+ where :math:`A` is the area and :math:`A_{MBR}`
+ is the area of minimum bounding
rotated rectangle.
Notes
@@ -414,7 +418,8 @@ def moment_of_inertia(collection):
Thus, for constant unit mass at each boundary point,
the MoI of this pointcloud is
- \sum_i d_{i,c}^2
+ .. math::
+ \\sum_i d_{i,c}^2
where c is the centroid of the polygon
@@ -441,7 +446,7 @@ def moa_ratio(collection):
"""
ga = _cast(collection)
r = pygeos.measurement.length(ga) / (2 * numpy.pi)
- return (numpy.pi * 0.5 * r ** 4) / second_areal_moment(ga)
+ return (numpy.pi * 0.5 * r**4) / second_areal_moment(ga)
def nmi(collection):
@@ -460,8 +465,9 @@ def second_areal_moment(collection):
moment of area is actually the cross-moment of area between the X and Y
dimensions:
- I_xy = (1/24)\sum^{i=N}^{i=1} (x_iy_{i+1} + 2*x_iy_i + 2*x_{i+1}y_{i+1} +
- x_{i+1}y_i)(x_iy_i - x_{i+1}y_i)
+ .. math::
+ I_xy = (1/24)\\sum^{i=N}^{i=1} (x_iy_{i+1} + 2*x_iy_i + 2*x_{i+1}y_{i+1} +
+ x_{i+1}y_i)(x_iy_i - x_{i+1}y_i)
where x_i, y_i is the current point and x_{i+1}, y_{i+1} is the next point,
and where x_{n+1} = x_1, y_{n+1} = 1.
@@ -483,7 +489,6 @@ def second_areal_moment(collection):
for hole_ix in range(n_holes):
hole = pygeos.get_coordinates(pygeos.get_interior_ring(ga, hole_ix))
result[i] -= _second_moa_ring(hole)
- n_parts = pygeos.get_num_geometries(geometry)
for part in pygeos.get_parts(geometry):
result[i] += _second_moa_ring(pygeos.get_coordinates(part))
# must divide everything by 24 and flip if polygon is clockwise.
@@ -500,12 +505,11 @@ def _second_moa_ring(points):
for i in prange(len(points[:-1])):
x_tail, y_tail = points[i]
x_head, y_head = points[i + 1]
- moi += (x_tail * y_head - x_head * y_tail) * (
- x_tail * y_head
- + 2 * x_tail * y_tail
- + 2 * x_head * y_head
- + x_head * y_tail
- )
+ xtyh = x_tail * y_head
+ xhyt = x_head * y_tail
+ xtyt = x_tail * y_tail
+ xhyh = x_head * y_head
+ moi += (xtyh - xhyt) * (xtyh + 2 * xtyt + 2 * xhyh + xhyt)
return moi
diff --git a/esda/silhouettes.py b/esda/silhouettes.py
index 2aa45f50..d732c6e9 100644
--- a/esda/silhouettes.py
+++ b/esda/silhouettes.py
@@ -1,26 +1,28 @@
+import warnings
+
import numpy as np
-from scipy.sparse import csgraph as cg
from scipy import sparse as sp
+from scipy.sparse import csgraph as cg
try:
+ import pandas as pd
import sklearn.metrics as sk
import sklearn.metrics.pairwise as skp
- from sklearn.preprocessing import LabelEncoder
- import pandas as pd
+ from sklearn.preprocessing import LabelEncoder # noqa F401
HAS_REQUIREMENTS = True
-except ImportError as e:
+except ImportError:
HAS_REQUIREMENTS = False
def _raise_initial_error():
missing = []
try:
- import sklearn
+ import sklearn # noqa F401
except ImportError:
missing.append("scikit-learn")
try:
- import pandas
+ import pandas # noqa F401
except ImportError:
missing.append("pandas")
raise ImportError(
@@ -50,7 +52,8 @@ def path_silhouette(
directed=False,
):
"""
- Compute a path silhouette for all observations :cite:`wolf2019geosilhouettes,Rousseeuw1987`.
+ Compute a path silhouette for all observations
+ :cite:`wolf2019geosilhouettes,Rousseeuw1987`.
Parameters
@@ -250,7 +253,8 @@ def path_silhouette(
def boundary_silhouette(data, labels, W, metric=skp.euclidean_distances):
"""
- Compute the observation-level boundary silhouette score :cite:`wolf2019geosilhouettes`.
+ Compute the observation-level boundary silhouette
+ score :cite:`wolf2019geosilhouettes`.
Parameters
----------
@@ -280,8 +284,8 @@ def boundary_silhouette(data, labels, W, metric=skp.euclidean_distances):
So, instead of considering *all* clusters when finding the next-best-fit cluster,
only clusters that `i` borders are considered.
This is supposed to model the fact that, in spatially-constrained clustering,
- observation i can only be reassigned from cluster c to cluster k if some observation
- j neighbors i and also resides in k.
+ observation i can only be reassigned from cluster c to cluster k if
+ some observation j neighbors i and also resides in k.
If an observation only neighbors its own cluster, i.e. is not on the boundary
of a cluster, this value is zero.
@@ -383,7 +387,10 @@ def silhouette_alist(data, labels, alist, indices=None, metric=skp.euclidean_dis
containing `focal` id, `neighbor` id, and `label_focal`, and `label_neighbor`,
this computes:
- `d(i,label_neighbor) - d(i,label_focal) / (max(d(i,label_neighbor), d(i,label_focal)))`
+ .. math::
+
+ d(i,label_neighbor) - d(i,label_focal)
+ / (max(d(i,label_neighbor), d(i,label_focal)))
Parameters
----------
@@ -465,17 +472,17 @@ def silhouette_alist(data, labels, alist, indices=None, metric=skp.euclidean_dis
neighbor_mask = np.nonzero(neighbor_mask.values)[0]
if len(neighbor_mask) == 0:
sils.append(0)
- warn(
- "A link ({},{}) has been found to have an empty set of neighbors. "
- " This may happen when a label assignment is missing for the neighbor unit."
- " Check that no labels are missing.".format(row.focal, row.neighbor)
+ warnings.warn(
+ f"A link ({row.focal},{row.neighbor}) has been found to have "
+ "an empty set of neighbors. This may happen when a label "
+ "assignment is missing for the neighbor unit. "
+ "Check that no labels are missing."
)
continue
outer_distance = full_distances[i_Xc, neighbor_mask].mean()
- sils.append(
- (outer_distance - within_cluster)
- / np.maximum(outer_distance, within_cluster)
- )
+ dist_diff = outer_distance - within_cluster
+ dist_max = np.maximum(outer_distance, within_cluster)
+ sils.append(dist_diff / dist_max)
result["silhouette"] = sils
return result.sort_values("focal").reset_index(drop=True)
@@ -492,22 +499,22 @@ def nearest_label(
Parameters
----------
data : (N,P) array to cluster on or DataFrame indexed on the same values as
- that in alist.focal/alist.neighbor
- labels: (N,) array containing classifications, indexed on the same values
- as that in alist.focal/alist.neighbor
- metric : callable, array,
- a function that takes an argument (data) and returns the all-pairs
- distances/dissimilarity between observations.
+ that in alist.focal/alist.neighbor
+ labels : (N,) array containing classifications, indexed on the same values
+ as that in alist.focal/alist.neighbor
+ metric : callable, array,
+ a function that takes an argument (data) and returns the all-pairs
+ distances/dissimilarity between observations.
return_distance: bool
- Whether to return the distance from the observation to its nearest
- cluster in feature space. If True, the tuple of (nearest_label, dissim)
- is returned. If False, only the nearest_label array is returned.
+ Whether to return the distance from the observation to its nearest
+ cluster in feature space. If True, the tuple of (nearest_label, dissim)
+ is returned. If False, only the nearest_label array is returned.
keep_self: bool
- whether to allow observations to use their current cluster as their
- nearest label. If True, an observation's existing cluster assignment can
- also be the cluster it is closest to. If False, an observation's existing
- cluster assignment cannot be the cluster it is closest to. This would mean
- the function computes the nearest *alternative* cluster.
+ whether to allow observations to use their current cluster as their
+ nearest label. If True, an observation's existing cluster assignment can
+ also be the cluster it is closest to. If False, an observation's existing
+ cluster assignment cannot be the cluster it is closest to. This would mean
+ the function computes the nearest *alternative* cluster.
Returns
-------
@@ -539,7 +546,6 @@ def nearest_label(
nearest_label_dissim = np.empty(labels.shape)
for label in unique_labels:
this_label_mask = labels == label
- n_in_label = this_label_mask.sum()
this_label_mask = np.nonzero(this_label_mask)[0]
next_best_fit = np.ones(this_label_mask.shape) * np.inf
next_best_label = np.empty(this_label_mask.shape, dtype=labels.dtype)
diff --git a/esda/smaup.py b/esda/smaup.py
index 127b4ed7..ba3f1647 100644
--- a/esda/smaup.py
+++ b/esda/smaup.py
@@ -13,7 +13,8 @@
class Smaup(object):
- """S-maup: Statistical Test to Measure the Sensitivity to the Modifiable Areal Unit Problem
+ """S-maup: Statistical Test to Measure the Sensitivity
+ to the Modifiable Areal Unit Problem
Parameters
----------
diff --git a/esda/smoothing.py b/esda/smoothing.py
index 2f312e53..599fb1cc 100644
--- a/esda/smoothing.py
+++ b/esda/smoothing.py
@@ -1,4 +1,5 @@
from __future__ import division
+
"""
Apply smoothing to rate computation
@@ -12,24 +13,59 @@
"""
-__author__ = "Myunghwa Hwang , David Folch , Luc Anselin , Serge Rey , "
+ "David Folch , "
+ "Luc Anselin , "
+ "Serge Rey = cumsum_threshold).nonzero()[0][0]
if reordered_w[median_inx] == cumsum_threshold and len(d) - 1 > median_inx:
- return np.sort(d)[median_inx:median_inx + 2].mean()
+ return np.sort(d)[median_inx : median_inx + 2].mean() # noqa E203
return np.sort(d)[median_inx]
@@ -155,9 +191,9 @@ def sum_by_n(d, w, n):
"""
t = len(d)
- h = t // n #must be floor!
+ h = t // n # must be floor!
d = d * w
- return np.array([sum(d[i: i + h]) for i in range(0, t, h)])
+ return np.array([sum(d[i : i + h]) for i in range(0, t, h)]) # noqa E203
def crude_age_standardization(e, b, n):
@@ -165,12 +201,13 @@ def crude_age_standardization(e, b, n):
Parameters
----------
- e : array
- (n*h, 1), event variable measured for each age group across n spatial units
- b : array
- (n*h, 1), population at risk variable measured for each age group across n spatial units
- n : integer
- the number of spatial units
+ e : array
+ (n*h, 1), event variable measured for each age group across n spatial units
+ b : array
+ (n*h, 1), population at risk variable measured for each age
+ group across n spatial units
+ n : integer
+ the number of spatial units
Notes
-----
@@ -218,16 +255,17 @@ def direct_age_standardization(e, b, s, n, alpha=0.05):
Parameters
----------
- e : array
- (n*h, 1), event variable measured for each age group across n spatial units
- b : array
- (n*h, 1), population at risk variable measured for each age group across n spatial units
- s : array
- (n*h, 1), standard population for each age group across n spatial units
- n : integer
- the number of spatial units
- alpha : float
- significance level for confidence interval
+ e : array
+ (n*h, 1), event variable measured for each age group across n spatial units
+ b : array
+ (n*h, 1), population at risk variable measured for each
+ age group across n spatial units
+ s : array
+ (n*h, 1), standard population for each age group across n spatial units
+ n : integer
+ the number of spatial units
+ alpha : float
+ significance level for confidence interval
Notes
-----
@@ -256,10 +294,11 @@ def direct_age_standardization(e, b, s, n, alpha=0.05):
>>> b = np.array([1000, 1000, 1100, 900, 1000, 900, 1100, 900])
For direct age standardization, we also need the data for standard population.
- Standard population is a reference population-at-risk (e.g., population distribution for the U.S.)
- whose age distribution can be used as a benchmarking point for comparing age distributions
- across regions (e.g., population distribution for Arizona and California).
- Another array including standard population is created.
+ Standard population is a reference population-at-risk (e.g., population
+ distribution for the U.S.) whose age distribution can be used as a benchmarking
+ point for comparing age distributions across regions (e.g., population
+ distribution for Arizona and California). Another array including standard
+ population is created.
>>> s = np.array([1000, 900, 1000, 900, 1000, 900, 1000, 900])
@@ -281,11 +320,11 @@ def direct_age_standardization(e, b, s, n, alpha=0.05):
var_estimate = sum_by_n(e, np.square(age_weight), n)
g_a = np.square(adjusted_r) / var_estimate
g_b = var_estimate / adjusted_r
- k = [age_weight[i:i + len(b) // n].max() for i in range(0, len(b),
- len(b) // n)]
+ _b = len(b)
+ _rb = range(0, _b, _b // n)
+ k = [age_weight[i : i + _b // n].max() for i in _rb] # noqa E203
g_a_k = np.square(adjusted_r + k) / (var_estimate + np.square(k))
g_b_k = (var_estimate + np.square(k)) / (adjusted_r + k)
- summed_b = sum_by_n(b, 1.0, n)
res = []
for i in range(len(adjusted_r)):
if adjusted_r[i] == 0:
@@ -303,18 +342,21 @@ def indirect_age_standardization(e, b, s_e, s_b, n, alpha=0.05):
Parameters
----------
- e : array
- (n*h, 1), event variable measured for each age group across n spatial units
- b : array
- (n*h, 1), population at risk variable measured for each age group across n spatial units
- s_e : array
- (n*h, 1), event variable measured for each age group across n spatial units in a standard population
- s_b : array
- (n*h, 1), population variable measured for each age group across n spatial units in a standard population
- n : integer
- the number of spatial units
- alpha : float
- significance level for confidence interval
+ e : array
+ (n*h, 1), event variable measured for each age group across n spatial units
+ b : array
+ (n*h, 1), population at risk variable measured for each age
+ group across n spatial units
+ s_e : array
+ (n*h, 1), event variable measured for each age group across
+ n spatial units in a standard population
+ s_b : array
+ (n*h, 1), population variable measured for each age group across
+ n spatial units in a standard population
+ n : integer
+ the number of spatial units
+ alpha : float
+ significance level for confidence interval
Notes
-----
@@ -342,13 +384,14 @@ def indirect_age_standardization(e, b, s_e, s_b, n, alpha=0.05):
>>> b = np.array([100, 100, 110, 90, 100, 90, 110, 90])
- For indirect age standardization, we also need the data for standard population and event.
- Standard population is a reference population-at-risk (e.g., population distribution for the U.S.)
- whose age distribution can be used as a benchmarking point for comparing age distributions
- across regions (e.g., popoulation distribution for Arizona and California).
- When the same concept is applied to the event variable,
- we call it standard event (e.g., the number of cancer patients in the U.S.).
- Two additional arrays including standard population and event are created.
+ For indirect age standardization, we also need the data for standard population
+ and event. Standard population is a reference population-at-risk (e.g.,
+ population distribution for the U.S.) whose age distribution can be used as a
+ benchmarking point for comparing age distributions across regions (e.g.,
+ popoulation distribution for Arizona and California). When the same concept is
+ applied to the event variable, we call it standard event (e.g., the number of
+ cancer patients in the U.S.). Two additional arrays including standard population
+ and event are created.
>>> s_e = np.array([100, 45, 120, 100, 50, 30, 200, 80])
>>> s_b = np.array([1000, 900, 1000, 900, 1000, 900, 1000, 900])
@@ -384,16 +427,19 @@ def standardized_mortality_ratio(e, b, s_e, s_b, n):
Parameters
----------
- e : array
- (n*h, 1), event variable measured for each age group across n spatial units
- b : array
- (n*h, 1), population at risk variable measured for each age group across n spatial units
- s_e : array
- (n*h, 1), event variable measured for each age group across n spatial units in a standard population
- s_b : array
- (n*h, 1), population variable measured for each age group across n spatial units in a standard population
- n : integer
- the number of spatial units
+ e : array
+ (n*h, 1), event variable measured for each age group across n spatial units
+ b : array
+ (n*h, 1), population at risk variable measured for each age
+ group across n spatial units
+ s_e : array
+ (n*h, 1), event variable measured for each age group across
+ n spatial units in a standard population
+ s_b : array
+ (n*h, 1), population variable measured for each age group
+ across n spatial units in a standard population
+ n : integer
+ the number of spatial units
Notes
-----
@@ -452,14 +498,14 @@ def choynowski(e, b, n, threshold=None):
Parameters
----------
- e : array(n*h, 1)
- event variable measured for each age group across n spatial units
- b : array(n*h, 1)
- population at risk variable measured for each age group across n spatial units
- n : integer
- the number of spatial units
- threshold : float
- Returns zero for any p-value greater than threshold
+ e : array(n*h, 1)
+ event variable measured for each age group across n spatial units
+ b : array(n*h, 1)
+ population at risk variable measured for each age group across n spatial units
+ n : integer
+ the number of spatial units
+ threshold : float
+ Returns zero for any p-value greater than threshold
Notes
-----
@@ -563,6 +609,7 @@ def assuncao_rate(e, b):
ebi_v = ebi_a + ebi_b / b
return (y - ebi_b) / np.sqrt(ebi_v)
+
class _Smoother(object):
"""
This is a helper class that implements things that all smoothers should do.
@@ -572,11 +619,12 @@ class _Smoother(object):
maybe headbanging triples), since they're literally only inits + one
attribute.
"""
+
def __init__(self):
pass
@classmethod
- def by_col(cls, df, e,b, inplace=False, **kwargs):
+ def by_col(cls, df, e, b, inplace=False, **kwargs):
"""
Compute smoothing by columns in a dataframe.
@@ -616,16 +664,19 @@ def by_col(cls, df, e,b, inplace=False, **kwargs):
try:
assert len(e) == len(b)
except AssertionError:
- raise ValueError('There is no one-to-one mapping between event'
- ' variable and population at risk variable!')
- for ei, bi in zip(e,b):
+ raise ValueError(
+ "There is no one-to-one mapping between event"
+ " variable and population at risk variable!"
+ )
+ for ei, bi in zip(e, b):
ename = ei
bname = bi
ei = df[ename]
bi = df[bname]
- outcol = '_'.join(('-'.join((ename, bname)), cls.__name__.lower()))
+ outcol = "_".join(("-".join((ename, bname)), cls.__name__.lower()))
df[outcol] = cls(ei, bi, **kwargs).r
+
class Excess_Risk(_Smoother):
"""Excess Risk
@@ -649,8 +700,8 @@ class Excess_Risk(_Smoother):
>>> import libpysal
>>> stl = libpysal.io.open(libpysal.examples.get_path('stl_hom.csv'), 'r')
- The 11th and 14th columns in stl_hom.csv includes the number of homocides and population.
- Creating two arrays from these columns.
+ The 11th and 14th columns in stl_hom.csv includes the number
+ of homocides and population. Creating two arrays from these columns.
>>> stl_e, stl_b = np.array(stl[:,10]), np.array(stl[:,13])
@@ -658,7 +709,8 @@ class Excess_Risk(_Smoother):
>>> er = Excess_Risk(stl_e, stl_b)
- Extracting the excess risk values through the property r of the Excess_Risk instance, er
+ Extracting the excess risk values through
+ the property r of the Excess_Risk instance, er
>>> er.r[:10]
array([[0.20665681],
@@ -673,9 +725,10 @@ class Excess_Risk(_Smoother):
[0.25821905]])
"""
+
def __init__(self, e, b):
- e = np.asarray(e).reshape(-1,1)
- b = np.asarray(b).reshape(-1,1)
+ e = np.asarray(e).reshape(-1, 1)
+ b = np.asarray(b).reshape(-1, 1)
r_mean = e.sum() * 1.0 / b.sum()
self.r = e * 1.0 / (b * r_mean)
@@ -705,8 +758,8 @@ class Empirical_Bayes(_Smoother):
>>> stl_ex = libpysal.examples.load_example('stl')
>>> stl = libpysal.io.open(stl_ex.get_path('stl_hom.csv'), 'r')
- The 11th and 14th columns in stl_hom.csv includes the number of homocides and population.
- Creating two arrays from these columns.
+ The 11th and 14th columns in stl_hom.csv includes
+ the number of homocides and population. Creating two arrays from these columns.
>>> stl_e, stl_b = np.array(stl[:,10]), np.array(stl[:,13])
@@ -714,7 +767,8 @@ class Empirical_Bayes(_Smoother):
>>> eb = Empirical_Bayes(stl_e, stl_b)
- Extracting the risk values through the property r of the Empirical_Bayes instance, eb
+ Extracting the risk values through the property
+ r of the Empirical_Bayes instance, eb
>>> eb.r[:10]
array([[2.36718950e-05],
@@ -729,9 +783,10 @@ class Empirical_Bayes(_Smoother):
[3.02748380e-05]])
"""
+
def __init__(self, e, b):
- e = np.asarray(e).reshape(-1,1)
- b = np.asarray(b).reshape(-1,1)
+ e = np.asarray(e).reshape(-1, 1)
+ b = np.asarray(b).reshape(-1, 1)
e_sum, b_sum = e.sum() * 1.0, b.sum() * 1.0
r_mean = e_sum / b_sum
rate = e * 1.0 / b
@@ -742,6 +797,7 @@ def __init__(self, e, b):
weight = r_var / (r_var + r_mean / b)
self.r = weight * rate + (1.0 - weight) * r_mean
+
class _Spatial_Smoother(_Smoother):
"""
This is a helper class that implements things that all the things that
@@ -753,11 +809,12 @@ class _Spatial_Smoother(_Smoother):
maybe headbanging triples), since they're literally only inits + one
attribute.
"""
+
def __init__(self):
pass
@classmethod
- def by_col(cls, df, e,b, w=None, inplace=False, **kwargs):
+ def by_col(cls, df, e, b, w=None, inplace=False, **kwargs):
"""
Compute smoothing by columns in a dataframe.
@@ -804,9 +861,11 @@ def by_col(cls, df, e,b, w=None, inplace=False, **kwargs):
if isinstance(w, W):
found = True
if not found:
- raise Exception('Weights not provided and no weights attached to frame!'
- ' Please provide a weight or attach a weight to the'
- ' dataframe')
+ raise Exception(
+ "Weights not provided and no weights attached to frame!"
+ " Please provide a weight or attach a weight to the"
+ " dataframe"
+ )
if isinstance(w, W):
w = [w] * len(e)
if len(b) == 1 and len(e) > 1:
@@ -814,16 +873,19 @@ def by_col(cls, df, e,b, w=None, inplace=False, **kwargs):
try:
assert len(e) == len(b)
except AssertionError:
- raise ValueError('There is no one-to-one mapping between event'
- ' variable and population at risk variable!')
+ raise ValueError(
+ "There is no one-to-one mapping between event"
+ " variable and population at risk variable!"
+ )
for ei, bi, wi in zip(e, b, w):
ename = ei
bname = bi
ei = df[ename]
bi = df[bname]
- outcol = '_'.join(('-'.join((ename, bname)), cls.__name__.lower()))
+ outcol = "_".join(("-".join((ename, bname)), cls.__name__.lower()))
df[outcol] = cls(ei, bi, w=wi, **kwargs).r
+
class Spatial_Empirical_Bayes(_Spatial_Smoother):
"""Spatial Empirical Bayes Smoothing
@@ -850,8 +912,8 @@ class Spatial_Empirical_Bayes(_Spatial_Smoother):
>>> stl_ex = libpysal.examples.load_example('stl')
>>> stl = libpysal.io.open(stl_ex.get_path('stl_hom.csv'), 'r')
- The 11th and 14th columns in stl_hom.csv includes the number of homocides and population.
- Creating two arrays from these columns.
+ The 11th and 14th columns in stl_hom.csv includes the number
+ of homocides and population. Creating two arrays from these columns.
>>> stl_e, stl_b = np.array(stl[:,10]), np.array(stl[:,13])
@@ -860,11 +922,13 @@ class Spatial_Empirical_Bayes(_Spatial_Smoother):
>>> stl_w = libpysal.io.open(stl_ex.get_path('stl.gal'), 'r').read()
Ensuring that the elements in the spatial weights instance are ordered
- by the given sequential numbers from 1 to the number of observations in stl_hom.csv
+ by the given sequential numbers from 1 to the number of observations
+ in stl_hom.csv
>>> if not stl_w.id_order_set: stl_w.id_order = range(1,len(stl) + 1)
- Creating an instance of Spatial_Empirical_Bayes class using stl_e, stl_b, and stl_w
+ Creating an instance of Spatial_Empirical_Bayes class
+ using stl_e, stl_b, and stl_w
>>> from esda.smoothing import Spatial_Empirical_Bayes
>>> s_eb = Spatial_Empirical_Bayes(stl_e, stl_b, stl_w)
@@ -883,14 +947,18 @@ class Spatial_Empirical_Bayes(_Spatial_Smoother):
[3.73034109e-05],
[3.47270722e-05]])
"""
+
def __init__(self, e, b, w):
if not w.id_order_set:
- raise ValueError("w id_order must be set to align with the order of e an b")
- e = np.asarray(e).reshape(-1,1)
- b = np.asarray(b).reshape(-1,1)
+ raise ValueError(
+ "The `id_order` of `w` must be set to "
+ "align with the order of `e` and `b`"
+ )
+ e = np.asarray(e).reshape(-1, 1)
+ b = np.asarray(b).reshape(-1, 1)
r_mean = Spatial_Rate(e, b, w).r
rate = e * 1.0 / b
- r_var_left = np.ones_like(e) * 1.
+ r_var_left = np.ones_like(e) * 1.0
ngh_num = np.ones_like(e)
bi = slag(w, b) + b
for i, idv in enumerate(w.id_order):
@@ -905,6 +973,7 @@ def __init__(self, e, b, w):
r_var[r_var < 0] = 0.0
self.r = r_mean + (rate - r_mean) * (r_var / (r_var + (r_mean / b)))
+
class Spatial_Rate(_Spatial_Smoother):
"""Spatial Rate Smoothing
@@ -931,8 +1000,8 @@ class Spatial_Rate(_Spatial_Smoother):
>>> stl_ex = libpysal.examples.load_example('stl')
>>> stl = libpysal.io.open(stl_ex.get_path('stl_hom.csv'), 'r')
- The 11th and 14th columns in stl_hom.csv includes the number of homocides and population.
- Creating two arrays from these columns.
+ The 11th and 14th columns in stl_hom.csv includes the
+ number of homocides and population. Creating two arrays from these columns.
>>> stl_e, stl_b = np.array(stl[:,10]), np.array(stl[:,13])
@@ -941,7 +1010,8 @@ class Spatial_Rate(_Spatial_Smoother):
>>> stl_w = libpysal.io.open(stl_ex.get_path('stl.gal'), 'r').read()
Ensuring that the elements in the spatial weights instance are ordered
- by the given sequential numbers from 1 to the number of observations in stl_hom.csv
+ by the given sequential numbers from 1 to the number
+ of observations in stl_hom.csv
>>> if not stl_w.id_order_set: stl_w.id_order = range(1,len(stl) + 1)
@@ -964,16 +1034,19 @@ class Spatial_Rate(_Spatial_Smoother):
[4.26204928e-05],
[3.47270722e-05]])
"""
+
def __init__(self, e, b, w):
if not w.id_order_set:
- raise ValueError("w id_order must be set to align with the order of e and b")
+ raise ValueError(
+ "w id_order must be set to align with the order of e and b"
+ )
else:
- e = np.asarray(e).reshape(-1,1)
- b = np.asarray(b).reshape(-1,1)
- w.transform = 'b'
+ e = np.asarray(e).reshape(-1, 1)
+ b = np.asarray(b).reshape(-1, 1)
+ w.transform = "b"
w_e, w_b = slag(w, e), slag(w, b)
self.r = (e + w_e) / (b + w_b)
- w.transform = 'o'
+ w.transform = "o"
class Kernel_Smoother(_Spatial_Smoother):
@@ -1020,7 +1093,8 @@ class Kernel_Smoother(_Spatial_Smoother):
>>> kr = Kernel_Smoother(e, b, kw)
- Extracting the smoothed rates through the property r of the Kernel_Smoother instance
+ Extracting the smoothed rates through the
+ property r of the Kernel_Smoother instance
>>> kr.r
array([[0.10543301],
@@ -1030,14 +1104,17 @@ class Kernel_Smoother(_Spatial_Smoother):
[0.04756872],
[0.04845298]])
"""
+
def __init__(self, e, b, w):
if type(w) != Kernel:
- raise Error('w must be an instance of Kernel weights')
+ raise ValueError("w must be an instance of Kernel weights")
if not w.id_order_set:
- raise ValueError("w id_order must be set to align with the order of e and b")
+ raise ValueError(
+ "w id_order must be set to align with the order of e and b"
+ )
else:
- e = np.asarray(e).reshape(-1,1)
- b = np.asarray(b).reshape(-1,1)
+ e = np.asarray(e).reshape(-1, 1)
+ b = np.asarray(b).reshape(-1, 1)
w_e, w_b = slag(w, e), slag(w, b)
self.r = w_e / w_b
@@ -1047,22 +1124,23 @@ class Age_Adjusted_Smoother(_Spatial_Smoother):
Parameters
----------
- e : array (n*h, 1)
- event variable measured for each age group across n spatial units
- b : array (n*h, 1)
- population at risk variable measured for each age group across n spatial units
- w : spatial weights instance
- s : array (n*h, 1)
- standard population for each age group across n spatial units
+ e : array (n*h, 1)
+ event variable measured for each age group across n spatial units
+ b : array (n*h, 1)
+ population at risk variable measured for each age group across n spatial units
+ w : spatial weights instance
+ s : array (n*h, 1)
+ standard population for each age group across n spatial units
Attributes
----------
- r : array (n, 1)
- rate values from spatial rate smoothing
+ r : array (n, 1)
+ rate values from spatial rate smoothing
Notes
-----
- Weights used to smooth age-specific events and populations are simple binary weights
+ Weights used to smooth age-specific events
+ and populations are simple binary weights
Examples
--------
@@ -1097,34 +1175,36 @@ class Age_Adjusted_Smoother(_Spatial_Smoother):
>>> ar = Age_Adjusted_Smoother(e, b, kw, s)
- Extracting the smoothed rates through the property r of the Age_Adjusted_Smoother instance
+ Extracting the smoothed rates through the property r of
+ the Age_Adjusted_Smoother instance
>>> ar.r
array([0.10519625, 0.08494318, 0.06440072, 0.06898604, 0.06952076,
0.05020968])
"""
+
def __init__(self, e, b, w, s, alpha=0.05):
e = np.asarray(e).reshape(-1, 1)
b = np.asarray(b).reshape(-1, 1)
s = np.asarray(s).flatten()
t = len(e)
h = t // w.n
- w.transform = 'b'
+ w.transform = "b"
e_n, b_n = [], []
for i in range(h):
e_n.append(slag(w, e[i::h]).tolist())
b_n.append(slag(w, b[i::h]).tolist())
- e_n = np.array(e_n).reshape((1, t), order='F')[0]
- b_n = np.array(b_n).reshape((1, t), order='F')[0]
+ e_n = np.array(e_n).reshape((1, t), order="F")[0]
+ b_n = np.array(b_n).reshape((1, t), order="F")[0]
e_n = e_n.reshape(s.shape)
b_n = b_n.reshape(s.shape)
r = direct_age_standardization(e_n, b_n, s, w.n, alpha=alpha)
self.r = np.array([i[0] for i in r])
- w.transform = 'o'
+ w.transform = "o"
- @_requires('pandas')
+ @_requires("pandas")
@classmethod
- def by_col(cls, df, e,b, w=None, s=None, **kwargs):
+ def by_col(cls, df, e, b, w=None, s=None, **kwargs):
"""
Compute smoothing by columns in a dataframe.
@@ -1162,6 +1242,7 @@ def by_col(cls, df, e,b, w=None, s=None, **kwargs):
if s is None:
raise Exception('Standard population variable "s" must be supplied.')
import pandas as pd
+
if isinstance(e, str):
e = [e]
if isinstance(b, str):
@@ -1176,14 +1257,17 @@ def by_col(cls, df, e,b, w=None, s=None, **kwargs):
found = True
break
if not found:
- raise Exception('Weights not provided and no weights attached to frame!'
- ' Please provide a weight or attach a weight to the'
- ' dataframe.')
+ raise Exception(
+ "Weights not provided and no weights attached to frame!"
+ " Please provide a weight or attach a weight to the"
+ " dataframe."
+ )
if isinstance(w, W):
w = [w] * len(e)
if not all(isinstance(wi, W) for wi in w):
- raise Exception('Weights object must be an instance of '
- ' libpysal.weights.W!')
+ raise Exception(
+ "Weights object must be an instance of " " libpysal.weights.W!"
+ )
b = b * len(e) if len(b) == 1 and len(e) > 1 else b
s = s * len(e) if len(s) == 1 and len(e) > 1 else s
try:
@@ -1191,17 +1275,18 @@ def by_col(cls, df, e,b, w=None, s=None, **kwargs):
assert len(e) == len(s)
assert len(e) == len(w)
except AssertionError:
- raise ValueError('There is no one-to-one mapping between event'
- ' variable and population at risk variable, and '
- ' standard population variable, and spatial '
- ' weights!')
+ raise ValueError(
+ "There is no one-to-one mapping between event"
+ " variable and population at risk variable, and "
+ " standard population variable, and spatial "
+ " weights!"
+ )
rdf = []
max_len = 0
for ei, bi, wi, si in zip(e, b, w, s):
ename = ei
bname = bi
- h = len(ei) // wi.n
- outcol = '_'.join(('-'.join((ename, bname)), cls.__name__.lower()))
+ outcol = "_".join(("-".join((ename, bname)), cls.__name__.lower()))
this_r = cls(df[ei], df[bi], w=wi, s=df[si], **kwargs).r
max_len = 0 if len(this_r) > max_len else max_len
rdf.append((outcol, this_r.tolist()))
@@ -1238,8 +1323,8 @@ class Disk_Smoother(_Spatial_Smoother):
>>> stl_ex = libpysal.examples.load_example('stl')
>>> stl = libpysal.io.open(stl_ex.get_path('stl_hom.csv'), 'r')
- The 11th and 14th columns in stl_hom.csv includes the number of homocides and population.
- Creating two arrays from these columns.
+ The 11th and 14th columns in stl_hom.csv includes the
+ number of homocides and population. Creating two arrays from these columns.
>>> stl_e, stl_b = np.array(stl[:,10]), np.array(stl[:,13])
@@ -1248,7 +1333,8 @@ class Disk_Smoother(_Spatial_Smoother):
>>> stl_w = libpysal.io.open(stl_ex.get_path('stl.gal'), 'r').read()
Ensuring that the elements in the spatial weights instance are ordered
- by the given sequential numbers from 1 to the number of observations in stl_hom.csv
+ by the given sequential numbers from 1 to the number of
+ observations in stl_hom.csv
>>> if not stl_w.id_order_set: stl_w.id_order = range(1,len(stl) + 1)
@@ -1274,15 +1360,17 @@ class Disk_Smoother(_Spatial_Smoother):
def __init__(self, e, b, w):
if not w.id_order_set:
- raise ValueError("w id_order must be set to align with the order of e and b")
+ raise ValueError(
+ "w id_order must be set to align with the order of e and b"
+ )
else:
- e = np.asarray(e).reshape(-1,1)
- b = np.asarray(b).reshape(-1,1)
+ e = np.asarray(e).reshape(-1, 1)
+ b = np.asarray(b).reshape(-1, 1)
r = e * 1.0 / b
weight_sum = []
for i in w.id_order:
weight_sum.append(sum(w.weights[i]))
- self.r = slag(w, r) / np.array(weight_sum).reshape(-1,1)
+ self.r = slag(w, r) / np.array(weight_sum).reshape(-1, 1)
class Spatial_Median_Rate(_Spatial_Smoother):
@@ -1318,8 +1406,8 @@ class Spatial_Median_Rate(_Spatial_Smoother):
>>> stl_ex = libpysal.examples.load_example('stl')
>>> stl = libpysal.io.open(stl_ex.get_path('stl_hom.csv'), 'r')
- The 11th and 14th columns in stl_hom.csv includes the number of homocides and population.
- Creating two arrays from these columns.
+ The 11th and 14th columns in stl_hom.csv includes the number of
+ homocides and population. Creating two arrays from these columns.
>>> stl_e, stl_b = np.array(stl[:,10]), np.array(stl[:,13])
@@ -1328,7 +1416,8 @@ class Spatial_Median_Rate(_Spatial_Smoother):
>>> stl_w = libpysal.io.open(stl_ex.get_path('stl.gal'), 'r').read()
Ensuring that the elements in the spatial weights instance are ordered
- by the given sequential numbers from 1 to the number of observations in stl_hom.csv
+ by the given sequential numbers from 1 to the number of
+ observations in stl_hom.csv
>>> if not stl_w.id_order_set: stl_w.id_order = range(1,len(stl) + 1)
@@ -1336,7 +1425,8 @@ class Spatial_Median_Rate(_Spatial_Smoother):
>>> smr0 = Spatial_Median_Rate(stl_e,stl_b,stl_w)
- Extracting the computed rates through the property r of the Spatial_Median_Rate instance
+ Extracting the computed rates through the property
+ r of the Spatial_Median_Rate instance
>>> smr0.r[:10]
array([3.96047383e-05, 3.55386859e-05, 3.28308921e-05, 4.30731238e-05,
@@ -1347,7 +1437,8 @@ class Spatial_Median_Rate(_Spatial_Smoother):
>>> smr1 = Spatial_Median_Rate(stl_e,stl_b,stl_w,iteration=5)
- Extracting the computed rates through the property r of the Spatial_Median_Rate instance
+ Extracting the computed rates through the property r
+ of the Spatial_Median_Rate instance
>>> smr1.r[:10]
array([3.11293620e-05, 2.95956330e-05, 3.11293620e-05, 3.10159267e-05,
@@ -1359,7 +1450,8 @@ class Spatial_Median_Rate(_Spatial_Smoother):
>>> smr2 = Spatial_Median_Rate(stl_e,stl_b,stl_w,aw=stl_b)
- Extracting the computed rates through the property r of the Spatial_Median_Rate instance
+ Extracting the computed rates through the property
+ r of the Spatial_Median_Rate instance
>>> smr2.r[:10]
array([5.77412020e-05, 4.46449551e-05, 5.77412020e-05, 5.77412020e-05,
@@ -1371,7 +1463,8 @@ class Spatial_Median_Rate(_Spatial_Smoother):
>>> smr3 = Spatial_Median_Rate(stl_e,stl_b,stl_w,aw=stl_b,iteration=5)
- Extracting the computed rates through the property r of the Spatial_Median_Rate instance
+ Extracting the computed rates through the property
+ r of the Spatial_Median_Rate instance
>>> smr3.r[:10]
array([3.61363528e-05, 4.46449551e-05, 3.61363528e-05, 3.61363528e-05,
@@ -1379,9 +1472,12 @@ class Spatial_Median_Rate(_Spatial_Smoother):
3.61363528e-05, 4.46449551e-05])
>>>
"""
+
def __init__(self, e, b, w, aw=None, iteration=1):
if not w.id_order_set:
- raise ValueError("w id_order must be set to align with the order of e and b")
+ raise ValueError(
+ "w id_order must be set to align with the order of e and b"
+ )
e = np.asarray(e).flatten()
b = np.asarray(b).flatten()
self.r = e * 1.0 / b
@@ -1464,8 +1560,8 @@ class Spatial_Filtering(_Smoother):
>>> bbox = [[-92.700676, 36.881809], [-87.916573, 40.3295669]]
- The 11th and 14th columns in stl_hom.csv includes the number of homocides and population.
- Creating two arrays from these columns.
+ The 11th and 14th columns in stl_hom.csv includes the number
+ of homocides and population. Creating two arrays from these columns.
>>> stl_e, stl_b = np.array(stl[:,10]), np.array(stl[:,13])
@@ -1474,7 +1570,8 @@ class Spatial_Filtering(_Smoother):
>>> sf_0 = Spatial_Filtering(bbox,d,stl_e,stl_b,10,10,r=2)
- Extracting the resulting rates through the property r of the Spatial_Filtering instance
+ Extracting the resulting rates through the property
+ r of the Spatial_Filtering instance
>>> sf_0.r[:10]
array([4.23561763e-05, 4.45290850e-05, 4.56456221e-05, 4.49133384e-05,
@@ -1491,7 +1588,8 @@ class Spatial_Filtering(_Smoother):
>>> sf.r.shape
(100,)
- Extracting the resulting rates through the property r of the Spatial_Filtering instance
+ Extracting the resulting rates through the property
+ r of the Spatial_Filtering instance
>>> sf.r[:10]
array([3.73728738e-05, 4.04456300e-05, 4.04456300e-05, 3.81035327e-05,
@@ -1500,13 +1598,15 @@ class Spatial_Filtering(_Smoother):
"""
def __init__(self, bbox, data, e, b, x_grid, y_grid, r=None, pop=None):
- e= np.asarray(e).reshape(-1,1)
- b= np.asarray(b).reshape(-1,1)
+ e = np.asarray(e).reshape(-1, 1)
+ b = np.asarray(b).reshape(-1, 1)
data_tree = KDTree(data)
x_range = bbox[1][0] - bbox[0][0]
y_range = bbox[1][1] - bbox[0][1]
- x, y = np.mgrid[bbox[0][0]:bbox[1][0]:float(x_range) / x_grid,
- bbox[0][1]:bbox[1][1]:float(y_range) / y_grid]
+ x, y = np.mgrid[
+ bbox[0][0] : bbox[1][0] : float(x_range) / x_grid, # noqa E203
+ bbox[0][1] : bbox[1][1] : float(y_range) / y_grid, # noqa E203
+ ]
self.grid = list(zip(x.ravel(), y.ravel()))
self.r = []
if r is None and pop is None:
@@ -1528,9 +1628,9 @@ def __init__(self, bbox, data, e, b, x_grid, y_grid, r=None, pop=None):
self.r.append(e_n_f[-1] * 1.0 / b_n_f[-1])
self.r = np.array(self.r)
- @_requires('pandas')
+ @_requires("pandas")
@classmethod
- def by_col(cls, df, e, b, x_grid, y_grid, geom_col='geometry', **kwargs):
+ def by_col(cls, df, e, b, x_grid, y_grid, geom_col="geometry", **kwargs):
"""
Compute smoothing by columns in a dataframe. The bounding box and point
information is computed from the geometry column.
@@ -1562,6 +1662,7 @@ def by_col(cls, df, e, b, x_grid, y_grid, geom_col='geometry', **kwargs):
cells.
"""
import pandas as pd
+
# prep for application over multiple event/population pairs
if isinstance(e, str):
e = [e]
@@ -1580,16 +1681,18 @@ def by_col(cls, df, e, b, x_grid, y_grid, geom_col='geometry', **kwargs):
res = []
for ename, bname, xgi, ygi in zip(e, b, x_grid, y_grid):
r = cls(bbox, data, df[ename], df[bname], xgi, ygi, **kwargs)
- grid = np.asarray(r.grid).reshape(-1,2)
- name = '_'.join(('-'.join((ename, bname)), cls.__name__.lower()))
- colnames = ('_'.join((name, suffix)) for suffix in ['X', 'Y', 'R'])
- items = [(name, col) for name,col in zip(colnames, [grid[:,0],
- grid[:,1],
- r.r])]
+ grid = np.asarray(r.grid).reshape(-1, 2)
+ name = "_".join(("-".join((ename, bname)), cls.__name__.lower()))
+ colnames = ("_".join((name, suffix)) for suffix in ["X", "Y", "R"])
+ items = [
+ (name, col)
+ for name, col in zip(colnames, [grid[:, 0], grid[:, 1], r.r])
+ ]
res.append(pd.DataFrame.from_dict(dict(items)))
outdf = pd.concat(res)
return outdf
+
class Headbanging_Triples(object):
"""Generate a pseudo spatial weights instance that contains headbanging triples
@@ -1614,7 +1717,8 @@ class Headbanging_Triples(object):
key is observation record id, value is a list of the following:
tuple of original triple observations
distance between original triple observations
- distance between an original triple observation and its extrapolated point
+ distance between an original triple
+ observation and its extrapolated point
Examples
--------
@@ -1626,7 +1730,9 @@ class Headbanging_Triples(object):
Reading data in stl_hom.csv into stl_db to extract values
for event and population-at-risk variables
- >>> stl_db = libpysal.io.open(libpysal.examples.get_path('stl_hom.csv'),'r') # doctest: +SKIP
+ >>> stl_db = libpysal.io.open(
+ ... libpysal.examples.get_path('stl_hom.csv'),'r'
+ ... ) # doctest: +SKIP
Reading the stl data in the WKT format so that
we can easily extract polygon centroids
@@ -1665,7 +1771,9 @@ class Headbanging_Triples(object):
Opening sids2.shp file
>>> import libpysal
- >>> sids = libpysal.io.open(libpysal.examples.get_path('sids2.shp'),'r') # doctest: +SKIP
+ >>> sids = libpysal.io.open(
+ ... libpysal.examples.get_path('sids2.shp'),'r'
+ ... ) # doctest: +SKIP
Extracting the centroids of polygons in the sids data
@@ -1720,17 +1828,20 @@ class Headbanging_Triples(object):
>>> round(extrapolated[1],5), round(extrapolated[2],6) # doctest: +SKIP
(0.33753, 0.302707)
"""
+
def __init__(self, data, w, k=5, t=3, angle=135.0, edgecor=False):
- raise DeprecationWarning('Deprecated')
+ raise DeprecationWarning("Deprecated")
if k < 3:
- raise ValueError("w should be NeareastNeighbors instance & the number of neighbors should be more than 3.")
+ raise ValueError(
+ "`w` should be a `NeareastNeighbors` instance & the number "
+ "of neighbors should be more than 3."
+ )
if not w.id_order_set:
raise ValueError("w id_order must be set to align with the order of data")
self.triples, points = {}, {}
for i, pnt in enumerate(data):
ng = w.neighbor_offsets[i]
- points[(i, Point(pnt))] = dict(list(zip(ng, [Point(d)
- for d in data[ng]])))
+ points[(i, Point(pnt))] = dict(list(zip(ng, [Point(d) for d in data[ng]])))
for i, pnt in list(points.keys()):
ng = points[(i, pnt)]
tr, tr_dis = {}, []
@@ -1742,8 +1853,8 @@ def __init__(self, data, w, k=5, t=3, angle=135.0, edgecor=False):
if len(tr) > t:
for c in list(tr.keys()):
p2, p3 = tr[c]
- tr_dis.append((get_segment_point_dist(
- LineSegment(p2, p3), pnt), c))
+ _seg = LineSegment(p2, p3)
+ tr_dis.append((get_segment_point_dist(_seg, pnt), c))
tr_dis = sorted(tr_dis)[:t]
self.triples[i] = [trp for dis, trp in tr_dis]
else:
@@ -1768,15 +1879,27 @@ def __init__(self, data, w, k=5, t=3, angle=135.0, edgecor=False):
dist_p_p2 = get_points_dist(point, p2)
dist_p_p3 = get_points_dist(point, p3)
if dist_p_p2 <= dist_p_p3:
- ray1, ray2, s_pnt, dist, c = Ray(p2, point), Ray(p2, p3), p2, dist_p_p2, (ps[p2], ps[p3])
+ ray1, ray2, s_pnt, dist, c = (
+ Ray(p2, point),
+ Ray(p2, p3),
+ p2,
+ dist_p_p2,
+ (ps[p2], ps[p3]),
+ )
else:
- ray1, ray2, s_pnt, dist, c = Ray(p3, point), Ray(p3, p2), p3, dist_p_p3, (ps[p3], ps[p2])
+ ray1, ray2, s_pnt, dist, c = (
+ Ray(p3, point),
+ Ray(p3, p2),
+ p3,
+ dist_p_p3,
+ (ps[p3], ps[p2]),
+ )
ang = get_angle_between(ray1, ray2)
- if ang >= 90 + angle / 2 or (ang < 0 and ang + 360 >= 90 + angle / 2):
- ex_point = get_point_at_angle_and_dist(
- ray1, angle, dist)
- extra = [c, dist_p2_p3, get_points_dist(
- s_pnt, ex_point)]
+ if ang >= 90 + angle / 2 or (
+ ang < 0 and ang + 360 >= 90 + angle / 2
+ ):
+ ex_point = get_point_at_angle_and_dist(ray1, angle, dist)
+ extra = [c, dist_p2_p3, get_points_dist(s_pnt, ex_point)]
break
self.triples[ps[point]].append(extra[0])
self.extra[ps[point]] = extra
@@ -1805,109 +1928,110 @@ class Headbanging_Median_Rate(object):
Examples
--------
- >>> import libpysal # doctest: +SKIP
-
+ >>> import libpysal # doctest: +SKIP
opening the sids2 shapefile
- >>> sids = libpysal.io.open(libpysal.examples.get_path('sids2.shp'), 'r') # doctest: +SKIP
-
+ >>> sids = libpysal.io.open(
+ ... libpysal.examples.get_path('sids2.shp'), 'r'
+ ... ) # doctest: +SKIP
extracting the centroids of polygons in the sids2 data
- >>> sids_d = np.array([i.centroid for i in sids]) # doctest: +SKIP
-
+ >>> sids_d = np.array([i.centroid for i in sids]) # doctest: +SKIP
creating a 5-nearest neighbors weights from the centroids
- >>> sids_w = libpysal.weights.KNN(sids_d,k=5) # doctest: +SKIP
-
+ >>> sids_w = libpysal.weights.KNN(sids_d,k=5) # doctest: +SKIP
ensuring that the members in sids_w are ordered
- >>> if not sids_w.id_order_set: sids_w.id_order = sids_w.id_order # doctest: +SKIP
-
+ >>> if not sids_w.id_order_set: sids_w.id_order = sids_w.id_order # doctest: +SKIP
finding headbanging triples by using 5 neighbors
return outdf
- >>> s_ht = Headbanging_Triples(sids_d,sids_w,k=5) # doctest: +SKIP
+ >>> s_ht = Headbanging_Triples(sids_d,sids_w,k=5) # doctest: +SKIP
DeprecationWarning: Deprecated
-
reading in the sids2 data table
- >>> sids_db = libpysal.io.open(libpysal.examples.get_path('sids2.dbf'), 'r') # doctest: +SKIP
-
+ >>> sids_db = libpysal.io.open(
+ ... libpysal.examples.get_path('sids2.dbf'), 'r'
+ ... ) # doctest: +SKIP
extracting the 10th and 9th columns in the sids2.dbf and
using data values as event and population-at-risk variables
- >>> s_e, s_b = np.array(sids_db[:,9]), np.array(sids_db[:,8]) # doctest: +SKIP
-
+ >>> s_e, s_b = np.array(sids_db[:,9]), np.array(sids_db[:,8]) # doctest: +SKIP
computing headbanging median rates from s_e, s_b, and s_ht
- >>> sids_hb_r = Headbanging_Median_Rate(s_e,s_b,s_ht) # doctest: +SKIP
-
+ >>> sids_hb_r = Headbanging_Median_Rate(s_e,s_b,s_ht) # doctest: +SKIP
- extracting the computed rates through the property r of the Headbanging_Median_Rate instance
+ extracting the computed rates through the property r of the
+ Headbanging_Median_Rate instance
- >>> sids_hb_r.r[:5] # doctest: +SKIP
+ >>> sids_hb_r.r[:5] # doctest: +SKIP
array([ 0.00075586, 0. , 0.0008285 , 0.0018315 , 0.00498891])
recomputing headbanging median rates with 5 iterations
- >>> sids_hb_r2 = Headbanging_Median_Rate(s_e,s_b,s_ht,iteration=5) # doctest: +SKIP
+ >>> sids_hb_r2 = Headbanging_Median_Rate(
+ ... s_e,s_b,s_ht,iteration=5
+ ... ) # doctest: +SKIP
+ extracting the computed rates through the property r of the
+ Headbanging_Median_Rate instance
- extracting the computed rates through the property r of the Headbanging_Median_Rate instance
-
- >>> sids_hb_r2.r[:5] # doctest: +SKIP
+ >>> sids_hb_r2.r[:5] # doctest: +SKIP
array([ 0.0008285 , 0.00084331, 0.00086896, 0.0018315 , 0.00498891])
recomputing headbanging median rates by considring a set of auxilliary weights
- >>> sids_hb_r3 = Headbanging_Median_Rate(s_e,s_b,s_ht,aw=s_b) # doctest: +SKIP
-
+ >>> sids_hb_r3 = Headbanging_Median_Rate(s_e,s_b,s_ht,aw=s_b) # doctest: +SKIP
- extracting the computed rates through the property r of the Headbanging_Median_Rate instance
+ extracting the computed rates through the property r of the
+ Headbanging_Median_Rate instance
- >>> sids_hb_r3.r[:5] # doctest: +SKIP
+ >>> sids_hb_r3.r[:5] # doctest: +SKIP
array([ 0.00091659, 0. , 0.00156838, 0.0018315 , 0.00498891])
"""
+
def __init__(self, e, b, t, aw=None, iteration=1):
- raise DeprecationWarning('Deprecated')
+ raise DeprecationWarning("Deprecated")
self.r = e * 1.0 / b
self.tr, self.aw = t.triples, aw
- if hasattr(t, 'extra'):
+ if hasattr(t, "extra"):
self.extra = t.extra
while iteration:
self.__search_headbanging_median()
iteration -= 1
def __get_screens(self, id, triples, weighted=False):
- r, tr = self.r, self.tr
+ r = self.r
if len(triples) == 0:
return r[id]
- if hasattr(self, 'extra') and id in self.extra:
+ if hasattr(self, "extra") and id in self.extra:
extra = self.extra
trp_r = r[list(triples[0])]
# observed rate
# plus difference in rate scaled by ratio of extrapolated distance
# & observed distance.
trp_r[-1] = trp_r[0] + (trp_r[0] - trp_r[-1]) * (
- extra[id][-1] * 1.0 / extra[id][1])
+ extra[id][-1] * 1.0 / extra[id][1]
+ )
trp_r = sorted(trp_r)
if not weighted:
return r[id], trp_r[0], trp_r[-1]
else:
trp_aw = self.aw[triples[0]]
- extra_w = trp_aw[0] + (trp_aw[0] - trp_aw[-
- 1]) * (extra[id][-1] * 1.0 / extra[id][1])
+ extra_w = trp_aw[0] + (trp_aw[0] - trp_aw[-1]) * (
+ extra[id][-1] * 1.0 / extra[id][1]
+ )
return r[id], trp_r[0], trp_r[-1], self.aw[id], trp_aw[0] + extra_w
if not weighted:
lowest, highest = [], []
@@ -1921,19 +2045,23 @@ def __get_screens(self, id, triples, weighted=False):
lowest_aw, highest_aw = [], []
for trp in triples:
trp_r = r[list(trp)]
- dtype = [('r', '%s' % trp_r.dtype), ('w',
- '%s' % self.aw.dtype)]
+ dtype = [("r", "%s" % trp_r.dtype), ("w", "%s" % self.aw.dtype)]
trp_r = np.array(list(zip(trp_r, list(trp))), dtype=dtype)
- trp_r.sort(order='r')
- lowest.append(trp_r['r'][0])
- highest.append(trp_r['r'][-1])
- lowest_aw.append(self.aw[int(round(trp_r['w'][0]))])
- highest_aw.append(self.aw[int(round(trp_r['w'][-1]))])
+ trp_r.sort(order="r")
+ lowest.append(trp_r["r"][0])
+ highest.append(trp_r["r"][-1])
+ lowest_aw.append(self.aw[int(round(trp_r["w"][0]))])
+ highest_aw.append(self.aw[int(round(trp_r["w"][-1]))])
wm_lowest = weighted_median(np.array(lowest), np.array(lowest_aw))
- wm_highest = weighted_median(
- np.array(highest), np.array(highest_aw))
+ wm_highest = weighted_median(np.array(highest), np.array(highest_aw))
triple_members = flatten(triples, unique=False)
- return r[id], wm_lowest, wm_highest, self.aw[id] * len(triples), self.aw[triple_members].sum()
+ return (
+ r[id],
+ wm_lowest,
+ wm_highest,
+ self.aw[id] * len(triples),
+ self.aw[triple_members].sum(),
+ )
def __get_median_from_screens(self, screens):
if isinstance(screens, float):
@@ -1952,17 +2080,16 @@ def __get_median_from_screens(self, screens):
return rk
def __search_headbanging_median(self):
- r, tr = self.r, self.tr
+ tr = self.tr
new_r = []
for k in list(tr.keys()):
- screens = self.__get_screens(
- k, tr[k], weighted=(self.aw is not None))
+ screens = self.__get_screens(k, tr[k], weighted=(self.aw is not None))
new_r.append(self.__get_median_from_screens(screens))
self.r = np.array(new_r)
- @_requires('pandas')
+ @_requires("pandas")
@classmethod
- def by_col(cls, df, e, b, t=None, geom_col='geometry', inplace=False, **kwargs):
+ def by_col(cls, df, e, b, t=None, geom_col="geometry", inplace=False, **kwargs):
"""
Compute smoothing by columns in a dataframe. The bounding box and point
information is computed from the geometry column.
@@ -1996,12 +2123,12 @@ def by_col(cls, df, e, b, t=None, geom_col='geometry', inplace=False, **kwargs):
event/population pairs. If done inplace, there is no return value and
`df` is modified in place.
"""
- import pandas as pd
+
if not inplace:
new = df.copy()
cls.by_col(new, e, b, t=t, geom_col=geom_col, inplace=True, **kwargs)
return new
- import pandas as pd
+
# prep for application over multiple event/population pairs
if isinstance(e, str):
e = [e]
@@ -2012,10 +2139,10 @@ def by_col(cls, df, e, b, t=None, geom_col='geometry', inplace=False, **kwargs):
data = get_points_array(df[geom_col])
- #Headbanging_Triples doesn't take **kwargs, so filter its arguments
+ # Headbanging_Triples doesn't take **kwargs, so filter its arguments
# (self, data, w, k=5, t=3, angle=135.0, edgecor=False):
- w = kwargs.pop('w', None)
+ w = kwargs.pop("w", None)
if w is None:
found = False
for k in df._metadata:
@@ -2023,20 +2150,20 @@ def by_col(cls, df, e, b, t=None, geom_col='geometry', inplace=False, **kwargs):
if isinstance(w, W):
found = True
if not found:
- raise Exception('Weights not provided and no weights attached to frame!'
- ' Please provide a weight or attach a weight to the'
- ' dataframe')
+ raise Exception(
+ "Weights not provided and no weights attached to frame!"
+ " Please provide a weight or attach a weight to the"
+ " dataframe"
+ )
- k = kwargs.pop('k', 5)
- t = kwargs.pop('t', 3)
- angle = kwargs.pop('angle', 135.0)
- edgecor = kwargs.pop('edgecor', False)
+ k = kwargs.pop("k", 5)
+ t = kwargs.pop("t", 3)
+ angle = kwargs.pop("angle", 135.0)
+ edgecor = kwargs.pop("edgecor", False)
- hbt = Headbanging_Triples(data, w, k=k, t=t, angle=angle,
- edgecor=edgecor)
+ hbt = Headbanging_Triples(data, w, k=k, t=t, angle=angle, edgecor=edgecor)
- res = []
for ename, bname in zip(e, b):
r = cls(df[ename], df[bname], hbt, **kwargs).r
- name = '_'.join(('-'.join((ename, bname)), cls.__name__.lower()))
+ name = "_".join(("-".join((ename, bname)), cls.__name__.lower()))
df[name] = r
diff --git a/esda/tabular.py b/esda/tabular.py
index 3ba7231c..2dfdffa2 100644
--- a/esda/tabular.py
+++ b/esda/tabular.py
@@ -1,8 +1,10 @@
# from ...common import requires as _requires
import itertools as _it
+
from libpysal.weights import W
+
# I would like to define it like this, so that you could make a call like:
# Geary(df, 'HOVAL', 'INC', w=W), but this only works in Python3. So, I have to
# use a workaround
@@ -51,7 +53,7 @@ def _univariate_handler(
**kwargs : optional keyword arguments
options that are passed directly to the statistic
"""
- ### Preprocess
+ # Preprocess
if not inplace:
new_df = df.copy()
_univariate_handler(
@@ -77,7 +79,7 @@ def _univariate_handler(
" Please provide a weight or attach a weight to the"
" dataframe"
)
- ### Prep indexes
+ # Prep indexes
if outvals is None:
outvals = []
outvals.insert(0, "_statistic")
@@ -100,13 +102,13 @@ def _univariate_handler(
if isinstance(cols, str):
cols = [cols]
- ### Make closure around weights & apply columnwise
+ # Make closure around weights & apply columnwise
def column_stat(column):
return stat(column.values, w=w, **kwargs)
stat_objs = df[cols].apply(column_stat)
- ### Assign into dataframe
+ # Assign into dataframe
for col in cols:
stat_obj = stat_objs[col]
y = kwargs.get("y")
diff --git a/esda/tests/test_adbscan.py b/esda/tests/test_adbscan.py
index b2dafebb..8440012e 100644
--- a/esda/tests/test_adbscan.py
+++ b/esda/tests/test_adbscan.py
@@ -1,10 +1,11 @@
import unittest
-import pandas
-import numpy as np
-from .. import adbscan
+import numpy as np
+import pandas
import pytest
+from .. import adbscan
+
class ADBSCAN_Tester(unittest.TestCase):
def setUp(self):
@@ -271,7 +272,13 @@ def test_get_cluster_boundary(self):
# # Single Core #
# ------------------------#
polys = adbscan.get_cluster_boundary(self.labels, self.db, xy=["x", "y"])
- wkt = "POLYGON ((0.7217553174317995 0.8192869956700687, 0.7605307121989587 0.9086488808086682, 0.9177741225129434 0.8568503024577332, 0.8126209616521135 0.6262871483113925, 0.6125260668293881 0.5475861559192435, 0.5425443680112613 0.7546476915298572, 0.7217553174317995 0.8192869956700687))"
+ wkt = (
+ "POLYGON ((0.7217553174317995 0.8192869956700687, 0.7605307121989587 "
+ "0.9086488808086682, 0.9177741225129434 0.8568503024577332, "
+ "0.8126209616521135 0.6262871483113925, 0.6125260668293881 "
+ "0.5475861559192435, 0.5425443680112613 0.7546476915298572, "
+ "0.7217553174317995 0.8192869956700687))"
+ )
self.assertEqual(polys[0].wkt, wkt)
# ------------------------#
@@ -280,7 +287,13 @@ def test_get_cluster_boundary(self):
polys = adbscan.get_cluster_boundary(
self.labels, self.db, xy=["x", "y"], n_jobs=-1
)
- wkt = "POLYGON ((0.7217553174317995 0.8192869956700687, 0.7605307121989587 0.9086488808086682, 0.9177741225129434 0.8568503024577332, 0.8126209616521135 0.6262871483113925, 0.6125260668293881 0.5475861559192435, 0.5425443680112613 0.7546476915298572, 0.7217553174317995 0.8192869956700687))"
+ wkt = (
+ "POLYGON ((0.7217553174317995 0.8192869956700687, 0.7605307121989587 "
+ "0.9086488808086682, 0.9177741225129434 0.8568503024577332, "
+ "0.8126209616521135 0.6262871483113925, 0.6125260668293881 "
+ "0.5475861559192435, 0.5425443680112613 0.7546476915298572, "
+ "0.7217553174317995 0.8192869956700687))"
+ )
self.assertEqual(polys[0].wkt, wkt)
diff --git a/esda/tests/test_gamma.py b/esda/tests/test_gamma.py
index 1e84091b..5d9870ce 100644
--- a/esda/tests/test_gamma.py
+++ b/esda/tests/test_gamma.py
@@ -1,12 +1,17 @@
import unittest
+
import numpy as np
+from libpysal.common import pandas
from libpysal.weights.util import lat2W
+
from ..gamma import Gamma
-from libpysal.common import pandas
PANDAS_EXTINCT = pandas is None
+
+
class Gamma_Tester(unittest.TestCase):
"""Unit test for Gamma Index"""
+
def setUp(self):
self.w = lat2W(4, 4)
self.y = np.ones(16)
@@ -24,7 +29,7 @@ def test_Gamma(self):
self.assertAlmostEqual(g.max_g, 20.0)
self.assertAlmostEqual(g.mean_g, 11.093093093093094)
np.random.seed(12345)
- g1 = Gamma(self.y, self.w, operation='s')
+ g1 = Gamma(self.y, self.w, operation="s")
self.assertAlmostEqual(g1.g, 8.0)
self.assertAlmostEqual(g1.g_z, -3.7057554345954791)
self.assertAlmostEqual(g1.p_sim_g, 0.001)
@@ -32,7 +37,7 @@ def test_Gamma(self):
self.assertAlmostEqual(g1.max_g, 48.0)
self.assertAlmostEqual(g1.mean_g, 25.623623623623622)
np.random.seed(12345)
- g2 = Gamma(self.y, self.w, operation='a')
+ g2 = Gamma(self.y, self.w, operation="a")
self.assertAlmostEqual(g2.g, 8.0)
self.assertAlmostEqual(g2.g_z, -3.7057554345954791)
self.assertAlmostEqual(g2.p_sim_g, 0.001)
@@ -58,22 +63,23 @@ def func(z, i, j):
self.assertAlmostEqual(g4.g_z, 3.1879280354548638)
self.assertAlmostEqual(g4.p_sim_g, 0.0030000000000000001)
- @unittest.skipIf(PANDAS_EXTINCT, 'missing pandas')
+ @unittest.skipIf(PANDAS_EXTINCT, "missing pandas")
def test_by_col(self):
import pandas as pd
- df = pd.DataFrame(self.y, columns=['y'])
- r1 = Gamma.by_col(df, ['y'], w=self.w)
- self.assertIn('y_gamma', r1.columns)
- self.assertIn('y_p_sim', r1.columns)
+
+ df = pd.DataFrame(self.y, columns=["y"])
+ r1 = Gamma.by_col(df, ["y"], w=self.w)
+ self.assertIn("y_gamma", r1.columns)
+ self.assertIn("y_p_sim", r1.columns)
this_gamma = np.unique(r1.y_gamma.values)
this_pval = np.unique(r1.y_p_sim.values)
np.testing.assert_allclose(this_gamma, self.g.g)
np.testing.assert_allclose(this_pval, self.g.p_sim)
- Gamma.by_col(df, ['y'], inplace=True, operation='s', w=self.w)
+ Gamma.by_col(df, ["y"], inplace=True, operation="s", w=self.w)
this_gamma = np.unique(df.y_gamma.values)
this_pval = np.unique(df.y_p_sim.values)
np.testing.assert_allclose(this_gamma, 8.0)
- np.testing.assert_allclose(this_pval, .001)
+ np.testing.assert_allclose(this_pval, 0.001)
suite = unittest.TestSuite()
@@ -82,6 +88,6 @@ def test_by_col(self):
a = unittest.TestLoader().loadTestsFromTestCase(i)
suite.addTest(a)
-if __name__ == '__main__':
+if __name__ == "__main__":
runner = unittest.TextTestRunner()
runner.run(suite)
diff --git a/esda/tests/test_geary.py b/esda/tests/test_geary.py
index c421f081..18c467b2 100644
--- a/esda/tests/test_geary.py
+++ b/esda/tests/test_geary.py
@@ -1,14 +1,13 @@
"""Geary Unittest."""
import unittest
-from libpysal.io import open as popen
+import numpy as np
from libpysal import examples
from libpysal.common import pandas
-from libpysal.weights import W
+from libpysal.io import open as popen
from libpysal.weights import Rook
from .. import geary
-import numpy as np
PANDAS_EXTINCT = pandas is None
diff --git a/esda/tests/test_getisord.py b/esda/tests/test_getisord.py
index 35ed3a42..35547215 100644
--- a/esda/tests/test_getisord.py
+++ b/esda/tests/test_getisord.py
@@ -1,9 +1,10 @@
import unittest
+
import numpy as np
+from libpysal.common import ATOL, RTOL, pandas
+from libpysal.weights.distance import DistanceBand
from .. import getisord
-from libpysal.weights.distance import DistanceBand
-from libpysal.common import pandas, RTOL, ATOL
POINTS = np.array([(10, 10), (20, 10), (40, 10), (15, 20), (30, 20), (30, 30)])
W = DistanceBand(POINTS, threshold=15)
diff --git a/esda/tests/test_join_counts.py b/esda/tests/test_join_counts.py
index 4bdbbd51..b3fda32d 100644
--- a/esda/tests/test_join_counts.py
+++ b/esda/tests/test_join_counts.py
@@ -1,14 +1,17 @@
import unittest
+
import numpy as np
+from libpysal.common import pandas
+from libpysal.weights.util import lat2W
from ..join_counts import Join_Counts
-from libpysal.weights.util import lat2W
-from libpysal.common import pandas
PANDAS_EXTINCT = pandas is None
+
class Join_Counts_Tester(unittest.TestCase):
"""Unit test for Join Counts"""
+
def setUp(self):
self.w = lat2W(4, 4)
self.y = np.ones(16)
@@ -41,12 +44,15 @@ def test_Join_Counts(self):
self.assertAlmostEqual(0.001, jc.p_sim_autocorr_pos)
self.assertAlmostEqual(0.2653504320039377, jc.sim_autocorr_chi2)
- @unittest.skipIf(PANDAS_EXTINCT, 'missing pandas')
+ @unittest.skipIf(PANDAS_EXTINCT, "missing pandas")
def test_by_col(self):
import pandas as pd
- df = pd.DataFrame(self.y, columns=['y'])
+
+ df = pd.DataFrame(self.y, columns=["y"])
np.random.seed(12345)
- r1 = Join_Counts.by_col(df, ['y'], w=self.w, permutations=999) # outvals = ['bb', 'bw', 'ww', 'p_sim_bw', 'p_sim_bb']
+ r1 = Join_Counts.by_col(
+ df, ["y"], w=self.w, permutations=999
+ ) # outvals = ['bb', 'bw', 'ww', 'p_sim_bw', 'p_sim_bb']
bb = np.unique(r1.y_bb.values)
bw = np.unique(r1.y_bw.values)
@@ -59,12 +65,13 @@ def test_by_col(self):
self.assertAlmostEqual(bb_p, c.p_sim_bb)
self.assertAlmostEqual(bw_p, c.p_sim_bw)
+
suite = unittest.TestSuite()
test_classes = [Join_Counts_Tester]
for i in test_classes:
a = unittest.TestLoader().loadTestsFromTestCase(i)
suite.addTest(a)
-if __name__ == '__main__':
+if __name__ == "__main__":
runner = unittest.TextTestRunner()
runner.run(suite)
diff --git a/esda/tests/test_ljc.py b/esda/tests/test_ljc.py
index 217c13e1..14235421 100644
--- a/esda/tests/test_ljc.py
+++ b/esda/tests/test_ljc.py
@@ -1,31 +1,32 @@
# based off: https://github.com/pysal/esda/blob/master/esda/tests/test_join_counts.py
import unittest
+
import numpy as np
-from libpysal.weights.util import lat2W
from libpysal.common import pandas
+from libpysal.weights.util import lat2W
from esda.join_counts_local import Join_Counts_Local
PANDAS_EXTINCT = pandas is None
+
class Join_Counts_Locals_Tester(unittest.TestCase):
"""Unit test for Local Join Counts (univariate)"""
+
def setUp(self):
self.w = lat2W(4, 4)
self.y = np.ones(16)
self.y[0:8] = 0
def test_Join_Counts_Locals(self):
- """Test method"""
- np.random.seed(12345)
- ljc = Join_Counts_Local(connectivity=self.w).fit(self.y)
- assert np.array_equal(ljc.LJC, [0, 0, 0, 0, 0, 0, 0, 0, 2, 3, 3, 2, 2, 3, 3, 2])
-
-
+ """Test method"""
+ np.random.seed(12345)
+ ljc = Join_Counts_Local(connectivity=self.w).fit(self.y)
+ assert np.array_equal(ljc.LJC, [0, 0, 0, 0, 0, 0, 0, 0, 2, 3, 3, 2, 2, 3, 3, 2])
+
+
suite = unittest.TestSuite()
-test_classes = [
- Join_Counts_Locals_Tester
-]
+test_classes = [Join_Counts_Locals_Tester]
for i in test_classes:
a = unittest.TestLoader().loadTestsFromTestCase(i)
suite.addTest(a)
diff --git a/esda/tests/test_ljc_bv.py b/esda/tests/test_ljc_bv.py
index 8e9af20a..4caa8f8d 100644
--- a/esda/tests/test_ljc_bv.py
+++ b/esda/tests/test_ljc_bv.py
@@ -1,34 +1,43 @@
# based off: https://github.com/pysal/esda/blob/master/tests/test_join_counts.py
import unittest
+
import numpy as np
-from libpysal.weights.util import lat2W
from libpysal.common import pandas
+from libpysal.weights.util import lat2W
from esda.join_counts_local_bv import Join_Counts_Local_BV
PANDAS_EXTINCT = pandas is None
+
class Local_Join_Counts_BV_Tester(unittest.TestCase):
"""Unit test for Local Join Counts (univariate)"""
+
def setUp(self):
self.w = lat2W(4, 4)
self.x = np.ones(16)
self.x[0:8] = 0
- self.z = [0,1,0,1,1,1,1,1,0,0,1,1,0,0,1,1]
+ self.z = [0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1]
def test_Local_Join_Counts_BV(self):
- """Test method"""
- np.random.seed(12345)
- ljc_bv_case1 = Join_Counts_Local_BV(connectivity=self.w).fit(self.x, self.z, case="BJC")
- ljc_bv_case2 = Join_Counts_Local_BV(connectivity=self.w).fit(self.x, self.z, case="CLC")
- assert np.array_equal(ljc_bv_case1.LJC, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0])
- assert np.array_equal(ljc_bv_case2.LJC, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 0, 0, 2, 2])
-
+ """Test method"""
+ np.random.seed(12345)
+ ljc_bv_case1 = Join_Counts_Local_BV(connectivity=self.w).fit(
+ self.x, self.z, case="BJC"
+ )
+ ljc_bv_case2 = Join_Counts_Local_BV(connectivity=self.w).fit(
+ self.x, self.z, case="CLC"
+ )
+ assert np.array_equal(
+ ljc_bv_case1.LJC, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0]
+ )
+ assert np.array_equal(
+ ljc_bv_case2.LJC, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 0, 0, 2, 2]
+ )
+
suite = unittest.TestSuite()
-test_classes = [
- Local_Join_Counts_BV_Tester
-]
+test_classes = [Local_Join_Counts_BV_Tester]
for i in test_classes:
a = unittest.TestLoader().loadTestsFromTestCase(i)
suite.addTest(a)
diff --git a/esda/tests/test_ljc_mv.py b/esda/tests/test_ljc_mv.py
index 7a67b7a1..49095961 100644
--- a/esda/tests/test_ljc_mv.py
+++ b/esda/tests/test_ljc_mv.py
@@ -1,33 +1,36 @@
# based off: https://github.com/pysal/esda/blob/master/tests/test_join_counts.py
import unittest
+
import numpy as np
-from libpysal.weights.util import lat2W
from libpysal.common import pandas
+from libpysal.weights.util import lat2W
from esda.join_counts_local_mv import Join_Counts_Local_MV
PANDAS_EXTINCT = pandas is None
+
class Local_Join_Counts_MV_Tester(unittest.TestCase):
"""Unit test for Local Join Counts (univariate)"""
+
def setUp(self):
self.w = lat2W(4, 4)
self.x = np.ones(16)
self.x[0:8] = 0
- self.y = [0,1,0,1,1,1,1,1,0,0,1,1,0,0,1,1]
- self.z = [0,1,1,1,1,1,1,1,0,0,0,1,0,0,1,1]
-
+ self.y = [0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1]
+ self.z = [0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1, 1]
def test_Local_Join_Counts_MV(self):
- """Test method"""
- np.random.seed(12345)
- ljc_mv = Join_Counts_Local_MV(connectivity=self.w).fit([self.x, self.y, self.z])
- assert np.array_equal(ljc_mv.LJC, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 2])
-
+ """Test method"""
+ np.random.seed(12345)
+ ljc_mv = Join_Counts_Local_MV(connectivity=self.w).fit([self.x, self.y, self.z])
+ assert np.array_equal(
+ ljc_mv.LJC, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 2]
+ )
+
+
suite = unittest.TestSuite()
-test_classes = [
- Local_Join_Counts_MV_Tester
-]
+test_classes = [Local_Join_Counts_MV_Tester]
for i in test_classes:
a = unittest.TestLoader().loadTestsFromTestCase(i)
suite.addTest(a)
diff --git a/esda/tests/test_local_geary.py b/esda/tests/test_local_geary.py
index 42197f90..3cb97c10 100644
--- a/esda/tests/test_local_geary.py
+++ b/esda/tests/test_local_geary.py
@@ -1,27 +1,26 @@
import unittest
+
import libpysal
-from libpysal.common import pandas, RTOL, ATOL
-from esda.geary_local import Geary_Local
import numpy as np
-PANDAS_EXTINCT = pandas is None
+from esda.geary_local import Geary_Local
+
class Geary_Local_Tester(unittest.TestCase):
def setUp(self):
np.random.seed(10)
self.w = libpysal.io.open(libpysal.examples.get_path("stl.gal")).read()
f = libpysal.io.open(libpysal.examples.get_path("stl_hom.txt"))
- self.y = np.array(f.by_col['HR8893'])
+ self.y = np.array(f.by_col["HR8893"])
def test_local_geary(self):
lG = Geary_Local(connectivity=self.w).fit(self.y)
self.assertAlmostEqual(lG.localG[0], 0.696703432)
self.assertAlmostEqual(lG.p_sim[0], 0.19)
-
+
+
suite = unittest.TestSuite()
-test_classes = [
- Geary_Local_Tester
-]
+test_classes = [Geary_Local_Tester]
for i in test_classes:
a = unittest.TestLoader().loadTestsFromTestCase(i)
suite.addTest(a)
@@ -29,4 +28,3 @@ def test_local_geary(self):
if __name__ == "__main__":
runner = unittest.TextTestRunner()
runner.run(suite)
-
diff --git a/esda/tests/test_local_geary_mv.py b/esda/tests/test_local_geary_mv.py
index aec3dc00..21bb40a2 100644
--- a/esda/tests/test_local_geary_mv.py
+++ b/esda/tests/test_local_geary_mv.py
@@ -1,29 +1,28 @@
import unittest
+
import libpysal
-from libpysal.common import pandas, RTOL, ATOL
-from esda.geary_local_mv import Geary_Local_MV
import numpy as np
-PANDAS_EXTINCT = pandas is None
+from esda.geary_local_mv import Geary_Local_MV
+
class Geary_Local_MV_Tester(unittest.TestCase):
def setUp(self):
np.random.seed(100)
self.w = libpysal.io.open(libpysal.examples.get_path("stl.gal")).read()
f = libpysal.io.open(libpysal.examples.get_path("stl_hom.txt"))
- self.y1 = np.array(f.by_col['HR8893'])
- self.y2 = np.array(f.by_col['HC8488'])
+ self.y1 = np.array(f.by_col["HR8893"])
+ self.y2 = np.array(f.by_col["HC8488"])
def test_local_geary_mv(self):
lG_mv = Geary_Local_MV(connectivity=self.w).fit([self.y1, self.y2])
print(lG_mv.p_sim[0])
self.assertAlmostEqual(lG_mv.localG[0], 0.4096931479581422)
self.assertAlmostEqual(lG_mv.p_sim[0], 0.211)
-
+
+
suite = unittest.TestSuite()
-test_classes = [
- Geary_Local_MV_Tester
-]
+test_classes = [Geary_Local_MV_Tester]
for i in test_classes:
a = unittest.TestLoader().loadTestsFromTestCase(i)
suite.addTest(a)
diff --git a/esda/tests/test_losh.py b/esda/tests/test_losh.py
index 6cad83db..8ddc8606 100644
--- a/esda/tests/test_losh.py
+++ b/esda/tests/test_losh.py
@@ -1,28 +1,27 @@
# based off: https://github.com/pysal/esda/blob/master/tests/test_moran.py#L96
import unittest
+
import libpysal
-from libpysal.common import pandas, RTOL, ATOL
-from esda.losh import LOSH
import numpy as np
-PANDAS_EXTINCT = pandas is None
+from esda.losh import LOSH
+
class Losh_Tester(unittest.TestCase):
def setUp(self):
np.random.seed(10)
self.w = libpysal.io.open(libpysal.examples.get_path("stl.gal")).read()
f = libpysal.io.open(libpysal.examples.get_path("stl_hom.txt"))
- self.y = np.array(f.by_col['HR8893'])
+ self.y = np.array(f.by_col["HR8893"])
def test_losh(self):
ls = LOSH(connectivity=self.w, inference="chi-square").fit(self.y)
self.assertAlmostEqual(ls.Hi[0], 0.77613471)
self.assertAlmostEqual(ls.pval[0], 0.22802201)
-
+
+
suite = unittest.TestSuite()
-test_classes = [
- Losh_Tester
-]
+test_classes = [Losh_Tester]
for i in test_classes:
a = unittest.TestLoader().loadTestsFromTestCase(i)
suite.addTest(a)
@@ -30,4 +29,3 @@ def test_losh(self):
if __name__ == "__main__":
runner = unittest.TextTestRunner()
runner.run(suite)
-
diff --git a/esda/tests/test_map_comparison.py b/esda/tests/test_map_comparison.py
index afaeaa29..7964578e 100644
--- a/esda/tests/test_map_comparison.py
+++ b/esda/tests/test_map_comparison.py
@@ -1,4 +1,9 @@
-import numpy, pytest, geopandas, os
+import os
+
+import geopandas
+import numpy
+import pytest
+
from .. import map_comparison as mc
pygeos = pytest.importorskip("pygeos")
@@ -63,6 +68,10 @@ def test_external_entropy():
numpy.testing.assert_allclose(v1, v2)
numpy.testing.assert_allclose(v1, 0.3612313462)
v1_ctipped = mc.external_entropy(r1, r2, base=base, balance=100)
- numpy.testing.assert_allclose(v1_ctipped, 0.42275150844) #value from completeness
+ numpy.testing.assert_allclose(
+ v1_ctipped, 0.42275150844
+ ) # value from completeness
v1_htipped = mc.external_entropy(r1, r2, base=base, balance=-100)
- numpy.testing.assert_allclose(v1_htipped, 0.31534179219) #value from homogeneity
+ numpy.testing.assert_allclose(
+ v1_htipped, 0.31534179219
+ ) # value from homogeneity
diff --git a/esda/tests/test_mixture_smoothing.py b/esda/tests/test_mixture_smoothing.py
index e9806315..cd662342 100644
--- a/esda/tests/test_mixture_smoothing.py
+++ b/esda/tests/test_mixture_smoothing.py
@@ -1,11 +1,13 @@
import unittest
+
import numpy as np
-import libpysal
+
from .. import mixture_smoothing as m_s
class MS_Tester(unittest.TestCase):
"""Mixture_Smoothing Unit Tests"""
+
def setUp(self):
self.e = np.array([10, 5, 12, 20])
self.b = np.array([100, 150, 80, 200])
@@ -13,29 +15,28 @@ def setUp(self):
def test_NP_Mixture_Smoother(self):
"""Test the main class"""
mix = m_s.NP_Mixture_Smoother(self.e, self.b)
- np.testing.assert_array_almost_equal(mix.r, np.array(
- [0.10982278, 0.03445531, 0.11018404, 0.11018604]))
np.testing.assert_array_almost_equal(
- mix.category, np.array([1, 0, 1, 1]))
- #self.failUnless(mix.getSeed(), (np.array([ 0.5, 0.5]), np.array([ 0.03333333,
+ mix.r, np.array([0.10982278, 0.03445531, 0.11018404, 0.11018604])
+ )
+ np.testing.assert_array_almost_equal(mix.category, np.array([1, 0, 1, 1]))
+ # self.failUnless(mix.getSeed(), (np.array([ 0.5, 0.5]), np.array([ 0.03333333,
# 0.15 ])))
left, right = mix.getSeed()
np.testing.assert_array_almost_equal(left, np.array([0.5, 0.5]))
- np.testing.assert_array_almost_equal(
- right, np.array([0.03333333, 0.15]))
+ np.testing.assert_array_almost_equal(right, np.array([0.03333333, 0.15]))
d = mix.mixalg()
np.testing.assert_array_almost_equal(
- d['mix_den'], np.array([0., 0., 0., 0.]))
- np.testing.assert_array_almost_equal(d['gradient'], np.array([0.]))
- np.testing.assert_array_almost_equal(d['p'], np.array([1.]))
- np.testing.assert_array_almost_equal(
- d['grid'], np.array([11.27659574]))
- self.assertEqual(d['k'], 1)
- self.assertEqual(d['accuracy'], 1.0)
+ d["mix_den"], np.array([0.0, 0.0, 0.0, 0.0])
+ )
+ np.testing.assert_array_almost_equal(d["gradient"], np.array([0.0]))
+ np.testing.assert_array_almost_equal(d["p"], np.array([1.0]))
+ np.testing.assert_array_almost_equal(d["grid"], np.array([11.27659574]))
+ self.assertEqual(d["k"], 1)
+ self.assertEqual(d["accuracy"], 1.0)
left, right = mix.getRateEstimates()
np.testing.assert_array_almost_equal(
- left, np.array([0.0911574, 0.0911574,
- 0.0911574, 0.0911574]))
+ left, np.array([0.0911574, 0.0911574, 0.0911574, 0.0911574])
+ )
np.testing.assert_array_almost_equal(right, np.array([1, 1, 1, 1]))
diff --git a/esda/tests/test_moran.py b/esda/tests/test_moran.py
index 65319885..9c347539 100644
--- a/esda/tests/test_moran.py
+++ b/esda/tests/test_moran.py
@@ -1,11 +1,11 @@
import unittest
+
import libpysal
-from libpysal.common import pandas, RTOL, ATOL
-from .. import moran
import numpy as np
+from libpysal.common import ATOL, RTOL
+from .. import moran
-PANDAS_EXTINCT = pandas is None
SEED = 12345
@@ -131,7 +131,6 @@ def test_Moran_Local_parallel(self):
self.assertAlmostEqual(lm.z_sim[0], -0.6990291160835514)
self.assertAlmostEqual(lm.p_z_sim[0], 0.24226691753791396)
-
@unittest.skip("This function is being deprecated in the next release.")
def test_by_col(self):
import pandas as pd
@@ -158,28 +157,58 @@ def test_local_moments(self):
permutations=0,
seed=SEED,
)
-
+
wikh_fast = moran._wikh_fast(lm.w.sparse)
wikh_slow = moran._wikh_slow(lm.w.sparse)
- wikh_fast_c = moran._wikh_fast(lm.w.sparse, sokal_correction=True)
- wikh_slow_c = moran._wikh_slow(lm.w.sparse, sokal_correction=True)
+ # wikh_fast_c = moran._wikh_fast(lm.w.sparse, sokal_correction=True)
+ # wikh_slow_c = moran._wikh_slow(lm.w.sparse, sokal_correction=True)
np.testing.assert_allclose(wikh_fast, wikh_slow, rtol=RTOL, atol=ATOL)
np.testing.assert_allclose(wikh_fast, wikh_slow, rtol=RTOL, atol=ATOL)
- EIc = np.array([-0.00838113, -0.0243949 , -0.07031778,
- -0.21520869, -0.16547163, -0.00178435,
- -0.11531888, -0.36138555, -0.05471258, -0.09413562])
- VIc = np.array([0.03636013, 0.10412408, 0.28600769,
- 0.26389674, 0.21576683, 0.00779261,
- 0.44633942, 0.57696508, 0.12929777, 0.3730742 ])
-
- EI = -np.ones((10,))/9
- VI = np.array([0.47374172, 0.47356458, 0.47209663,
- 0.15866023, 0.15972526, 0.47376436,
- 0.46927721, 0.24584217, 0.26498308, 0.47077467])
-
-
+ EIc = np.array(
+ [
+ -0.00838113,
+ -0.0243949,
+ -0.07031778,
+ -0.21520869,
+ -0.16547163,
+ -0.00178435,
+ -0.11531888,
+ -0.36138555,
+ -0.05471258,
+ -0.09413562,
+ ]
+ )
+ VIc = np.array(
+ [
+ 0.03636013,
+ 0.10412408,
+ 0.28600769,
+ 0.26389674,
+ 0.21576683,
+ 0.00779261,
+ 0.44633942,
+ 0.57696508,
+ 0.12929777,
+ 0.3730742,
+ ]
+ )
+ EI = -np.ones((10,)) / 9
+ VI = np.array(
+ [
+ 0.47374172,
+ 0.47356458,
+ 0.47209663,
+ 0.15866023,
+ 0.15972526,
+ 0.47376436,
+ 0.46927721,
+ 0.24584217,
+ 0.26498308,
+ 0.47077467,
+ ]
+ )
np.testing.assert_allclose(lm.EIc, EIc, rtol=RTOL, atol=ATOL)
np.testing.assert_allclose(lm.VIc, VIc, rtol=RTOL, atol=ATOL)
diff --git a/esda/tests/test_shape.py b/esda/tests/test_shape.py
index eb3d5bbf..34feb00c 100644
--- a/esda/tests/test_shape.py
+++ b/esda/tests/test_shape.py
@@ -1,12 +1,12 @@
-from shapely import geometry
-from numpy import testing, array
import pytest
+from numpy import array, testing
+from shapely import geometry
+
+import esda
pygeos = pytest.importorskip("pygeos")
pytest.importorskip("numba")
-from esda.shape import *
-
shape = array(
[
@@ -31,94 +31,93 @@
def test_boundary_amplitude():
- observed = boundary_amplitude(shape)
+ observed = esda.shape.boundary_amplitude(shape)
testing.assert_allclose(observed, 0.844527, atol=ATOL)
def test_convex_hull_ratio():
- observed = convex_hull_ratio(shape)
+ observed = esda.shape.convex_hull_ratio(shape)
testing.assert_allclose(observed, 0.7, atol=ATOL)
def test_length_width_diff():
- observed = length_width_diff(shape)
+ observed = esda.shape.length_width_diff(shape)
testing.assert_allclose(observed, 0.25, atol=ATOL)
def test_radii_ratio():
- observed = radii_ratio(shape)
+ observed = esda.shape.radii_ratio(shape)
testing.assert_allclose(observed, 0.659366, atol=ATOL)
def test_diameter_ratio():
- observed = diameter_ratio(shape)
+ observed = esda.shape.diameter_ratio(shape)
testing.assert_allclose(observed, 0.8, atol=ATOL)
def test_iaq():
- observed = isoareal_quotient(shape)
+ observed = esda.shape.isoareal_quotient(shape)
testing.assert_allclose(observed, 0.622314, atol=ATOL)
def test_ipq():
- observed = isoperimetric_quotient(shape)
+ observed = esda.shape.isoperimetric_quotient(shape)
testing.assert_allclose(observed, 0.387275, atol=ATOL)
def test_moa():
- observed = moa_ratio(shape)
+ observed = esda.shape.moa_ratio(shape)
testing.assert_allclose(observed, 3.249799, atol=ATOL)
def test_moment_of_interia():
- observed = moment_of_inertia(shape)
+ observed = esda.shape.moment_of_inertia(shape)
testing.assert_allclose(observed, 0.315715, atol=ATOL)
def test_nmi():
- observed = nmi(shape)
+ observed = esda.shape.nmi(shape)
testing.assert_allclose(observed, 0.487412, atol=ATOL)
def test_mbc():
- observed = minimum_bounding_circle_ratio(shape)
+ observed = esda.shape.minimum_bounding_circle_ratio(shape)
testing.assert_allclose(observed, 0.437571, atol=ATOL)
def test_reflexive_angle_ratio():
- observed = reflexive_angle_ratio(shape)
+ observed = esda.shape.reflexive_angle_ratio(shape)
testing.assert_allclose(observed, 3 / 8, atol=ATOL)
def test_fractal_dimension():
r = [
- fractal_dimension(shape, support=support)[0]
+ esda.shape.fractal_dimension(shape, support=support)[0]
for support in ("hex", "square", "circle")
]
-
testing.assert_allclose(r, [0.218144, -4.29504, 0.257882], atol=ATOL)
def test_squareness():
- observed = squareness(shape)
+ observed = esda.shape.squareness(shape)
testing.assert_allclose(observed, 0.493094, atol=ATOL)
def test_rectangularity():
- observed = rectangularity(shape)
+ observed = esda.shape.rectangularity(shape)
testing.assert_allclose(observed, 0.7, atol=ATOL)
def test_shape_index():
- observed = shape_index(shape)
+ observed = esda.shape.shape_index(shape)
testing.assert_allclose(observed, 0.659366, atol=ATOL)
def test_equivalent_rectangular_index():
- observed = equivalent_rectangular_index(shape)
+ observed = esda.shape.equivalent_rectangular_index(shape)
testing.assert_allclose(observed, 0.706581, atol=ATOL)
def test_form_factor():
- observed = form_factor(shape, array([2]))
+ observed = esda.shape.form_factor(shape, array([2]))
testing.assert_allclose(observed, 0.602535, atol=ATOL)
diff --git a/esda/tests/test_silhouette.py b/esda/tests/test_silhouette.py
index b0f9945f..56b13e77 100644
--- a/esda/tests/test_silhouette.py
+++ b/esda/tests/test_silhouette.py
@@ -1,124 +1,191 @@
-from .. import silhouettes
-import geopandas
+import unittest
+
import libpysal
import numpy
-import unittest
from sklearn.metrics import pairwise
+from .. import silhouettes
+
RTOL = libpysal.common.RTOL
ATOL = libpysal.common.ATOL
+
class Silhouette_Tester(unittest.TestCase):
def setUp(self):
- self.w = libpysal.weights.lat2W(3,3, rook=False)
+ self.w = libpysal.weights.lat2W(3, 3, rook=False)
numpy.random.seed(12345)
- self.X = numpy.random.random((9,3))
- self.groups = numpy.array([[0,0,0],
- [0,1,1],
- [0,1,1]]).flatten()
- self.precomputed = self.X @ self.X.T
+ self.X = numpy.random.random((9, 3))
+ self.groups = numpy.array([[0, 0, 0], [0, 1, 1], [0, 1, 1]]).flatten()
+ self.precomputed = self.X @ self.X.T
self.altmetric = pairwise.manhattan_distances
def test_boundary(self):
- known = numpy.array([ 0.03042804,
- -0.26071434,
- 0.26664155,
- -0.09395997,
- 0.02754655,
- -0.30581229,
- 0.2697089 ,
- -0.29993007,
- 0. ])
+ known = numpy.array(
+ [
+ 0.03042804,
+ -0.26071434,
+ 0.26664155,
+ -0.09395997,
+ 0.02754655,
+ -0.30581229,
+ 0.2697089,
+ -0.29993007,
+ 0.0,
+ ]
+ )
test = silhouettes.boundary_silhouette(self.X, self.groups, self.w)
numpy.testing.assert_allclose(known, test, rtol=RTOL, atol=ATOL)
- known = numpy.array([ 0.04246261,
- -0.17059996,
- 0.35656939,
- -0.10765454,
- 0.00831288,
- -0.30924291,
- 0.36502035,
- -0.25464692,
- 0. ])
- test = silhouettes.boundary_silhouette(self.X, self.groups, self.w,
- metric=self.altmetric)
+ known = numpy.array(
+ [
+ 0.04246261,
+ -0.17059996,
+ 0.35656939,
+ -0.10765454,
+ 0.00831288,
+ -0.30924291,
+ 0.36502035,
+ -0.25464692,
+ 0.0,
+ ]
+ )
+ test = silhouettes.boundary_silhouette(
+ self.X, self.groups, self.w, metric=self.altmetric
+ )
numpy.testing.assert_allclose(known, test, rtol=RTOL, atol=ATOL)
- known = numpy.array([-0.43402484,
- -0.29488836,
- -0.3663925 ,
- -0.32831205,
- -0.21536904,
- -0.10134236,
- -0.37144783,
- -0.01618446,
- 0. ])
- test = silhouettes.boundary_silhouette(self.X, self.groups, self.w,
- metric=self.precomputed)
+ known = numpy.array(
+ [
+ -0.43402484,
+ -0.29488836,
+ -0.3663925,
+ -0.32831205,
+ -0.21536904,
+ -0.10134236,
+ -0.37144783,
+ -0.01618446,
+ 0.0,
+ ]
+ )
+ test = silhouettes.boundary_silhouette(
+ self.X, self.groups, self.w, metric=self.precomputed
+ )
numpy.testing.assert_allclose(known, test, rtol=RTOL, atol=ATOL)
with self.assertRaises(AssertionError):
- silhouettes.boundary_silhouette(self.X, self.groups, self.w,
- metric=self.precomputed - self.precomputed.mean())
+ silhouettes.boundary_silhouette(
+ self.X,
+ self.groups,
+ self.w,
+ metric=self.precomputed - self.precomputed.mean(),
+ )
def test_path(self):
- known = numpy.array([ 0.15982274,
- -0.02136909,
- -0.3972349 ,
- 0.24479121,
- 0.02754655,
- 0.28465546,
- -0.07572727,
- 0.26903733,
- 0.4165144 ])
+ known = numpy.array(
+ [
+ 0.15982274,
+ -0.02136909,
+ -0.3972349,
+ 0.24479121,
+ 0.02754655,
+ 0.28465546,
+ -0.07572727,
+ 0.26903733,
+ 0.4165144,
+ ]
+ )
test = silhouettes.path_silhouette(self.X, self.groups, self.w)
numpy.testing.assert_allclose(known, test, rtol=RTOL, atol=ATOL)
- known = numpy.array([ 0.1520476 ,
- 0.0390323 ,
- -0.34269345,
- 0.27239358,
- 0.00831288,
- 0.27934432,
- -0.03874118,
- 0.28623703,
- 0.40062121])
- test = silhouettes.path_silhouette(self.X, self.groups, self.w,
- metric=self.altmetric)
+ known = numpy.array(
+ [
+ 0.1520476,
+ 0.0390323,
+ -0.34269345,
+ 0.27239358,
+ 0.00831288,
+ 0.27934432,
+ -0.03874118,
+ 0.28623703,
+ 0.40062121,
+ ]
+ )
+ test = silhouettes.path_silhouette(
+ self.X, self.groups, self.w, metric=self.altmetric
+ )
numpy.testing.assert_allclose(known, test, rtol=RTOL, atol=ATOL)
with self.assertRaises(TypeError):
- silhouettes.path_silhouette(self.X, self.groups, self.w,
- metric=self.precomputed)
+ silhouettes.path_silhouette(
+ self.X, self.groups, self.w, metric=self.precomputed
+ )
with self.assertRaises(AssertionError):
- silhouettes.path_silhouette(self.X, self.groups, self.w,
- metric=lambda d: -self.altmetric(d))
+ silhouettes.path_silhouette(
+ self.X, self.groups, self.w, metric=lambda d: -self.altmetric(d)
+ )
def test_nearest_label(self):
- known = numpy.array([1,1,1,1,0,0,1,0,0])
- test,d = silhouettes.nearest_label(self.X, self.groups, return_distance=True)
+ known = numpy.array([1, 1, 1, 1, 0, 0, 1, 0, 0])
+ test, d = silhouettes.nearest_label(self.X, self.groups, return_distance=True)
numpy.testing.assert_array_equal(known, test)
- known = numpy.array([0,1,0,0,1,0,0,0,1])
+ known = numpy.array([0, 1, 0, 0, 1, 0, 0, 0, 1])
test = silhouettes.nearest_label(self.X, self.groups, keep_self=True)
numpy.testing.assert_array_equal(test, known)
- knownd = numpy.array([1.05707684,
- 0.74780721,
- 0.88841079,
- 0.71628677,
- 1.25964181,
- 0.5854757 ,
- 0.89710073,
- 0.64575898,
- 0.73913526])
+ knownd = numpy.array(
+ [
+ 1.05707684,
+ 0.74780721,
+ 0.88841079,
+ 0.71628677,
+ 1.25964181,
+ 0.5854757,
+ 0.89710073,
+ 0.64575898,
+ 0.73913526,
+ ]
+ )
numpy.testing.assert_allclose(d, knownd, rtol=RTOL, atol=ATOL)
def test_silhouette_alist(self):
- known = numpy.array([0., 0., 0.22434244, 0.,
- 0., 0., -0.07589293, -0.07589293,
- 0., 0.41331324, 0.41331324, 0.,
- 0., 0.11703681, 0., 0.11703681,
- 0.27065991, 0.27065991, 0.27065991,
- 0.27065991, 0., 0.27065991,
- 0., 0., -0.07441639, -0.07441639,
- 0., 0., 0., 0., 0.41576712,
- 0.41576712, -0.06657343, 0., 0.,
- -0.06657343, 0., 0., 0., 0.])
+ known = numpy.array(
+ [
+ 0.0,
+ 0.0,
+ 0.22434244,
+ 0.0,
+ 0.0,
+ 0.0,
+ -0.07589293,
+ -0.07589293,
+ 0.0,
+ 0.41331324,
+ 0.41331324,
+ 0.0,
+ 0.0,
+ 0.11703681,
+ 0.0,
+ 0.11703681,
+ 0.27065991,
+ 0.27065991,
+ 0.27065991,
+ 0.27065991,
+ 0.0,
+ 0.27065991,
+ 0.0,
+ 0.0,
+ -0.07441639,
+ -0.07441639,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.41576712,
+ 0.41576712,
+ -0.06657343,
+ 0.0,
+ 0.0,
+ -0.06657343,
+ 0.0,
+ 0.0,
+ 0.0,
+ 0.0,
+ ]
+ )
test = silhouettes.silhouette_alist(self.X, self.groups, self.w.to_adjlist())
numpy.testing.assert_allclose(known, test.silhouette, rtol=RTOL, atol=ATOL)
diff --git a/esda/tests/test_smaup.py b/esda/tests/test_smaup.py
index 396397a7..01467fa1 100644
--- a/esda/tests/test_smaup.py
+++ b/esda/tests/test_smaup.py
@@ -1,63 +1,62 @@
import unittest
+
import libpysal
-from libpysal.common import pandas, RTOL, ATOL
-from ..smaup import Smaup
-from ..moran import Moran
import numpy as np
+from libpysal.common import ATOL, RTOL, pandas
+from ..moran import Moran
+from ..smaup import Smaup
PANDAS_EXTINCT = pandas is None
+
class Smaup_Tester(unittest.TestCase):
- def setup(self):
- from esda.moran import Moran
+ def setUp(self):
self.w = libpysal.io.open(libpysal.examples.get_path("stl.gal")).read()
f = libpysal.io.open(libpysal.examples.get_path("stl_hom.txt"))
- self.y = np.array(f.by_col['HR8893'])
+ self.y = np.array(f.by_col["HR8893"])
self.rho = Moran(self.y, self.w).I
self.n = len(self.y)
- self.k = int(self.n/2)
+ self.k = int(self.n / 2)
def test_smaup(self):
- self.setup()
sm = Smaup(self.n, self.k, self.rho)
self.assertAlmostEqual(sm.n, 78)
self.assertAlmostEqual(sm.k, 39)
self.assertAlmostEqual(sm.rho, 0.24365582621771695)
- np.testing.assert_allclose(sm.smaup, 0.15221341690376405, rtol=RTOL, atol=ATOL)
+ np.testing.assert_allclose(sm.smaup, 0.15221341690376405, rtol=RTOL, atol=ATOL)
self.assertAlmostEqual(sm.critical_01, 0.38970613333333337)
self.assertAlmostEqual(sm.critical_05, 0.3557221333333333)
self.assertAlmostEqual(sm.critical_1, 0.3157950666666666)
- self.assertEqual(sm.summary, 'Pseudo p-value > 0.10 (H0 is not rejected)')
+ self.assertEqual(sm.summary, "Pseudo p-value > 0.10 (H0 is not rejected)")
def test_sids(self):
- from esda import moran
w = libpysal.io.open(libpysal.examples.get_path("sids2.gal")).read()
f = libpysal.io.open(libpysal.examples.get_path("sids2.dbf"))
SIDR = np.array(f.by_col("SIDR74"))
- rho = moran.Moran(SIDR, w, two_tailed=False).I
+ rho = Moran(SIDR, w, two_tailed=False).I
n = len(SIDR)
- k = int(n/2)
+ k = int(n / 2)
sm = Smaup(n, k, rho)
- np.testing.assert_allclose(sm.smaup, 0.15176796553181948, rtol=RTOL, atol=ATOL)
+ np.testing.assert_allclose(sm.smaup, 0.15176796553181948, rtol=RTOL, atol=ATOL)
self.assertAlmostEqual(sm.critical_01, 0.23404000000000003)
self.assertAlmostEqual(sm.critical_05, 0.21088)
self.assertAlmostEqual(sm.critical_1, 0.18239)
- self.assertEqual(sm.summary, 'Pseudo p-value > 0.10 (H0 is not rejected)')
+ self.assertEqual(sm.summary, "Pseudo p-value > 0.10 (H0 is not rejected)")
- @unittest.skipIf(PANDAS_EXTINCT, 'missing pandas')
+ @unittest.skipIf(PANDAS_EXTINCT, "missing pandas")
def test_by_col(self):
from libpysal.io import geotable as pdio
- from esda import moran
+
np.random.seed(11213)
- df = pdio.read_files(libpysal.examples.get_path('sids2.dbf'))
+ df = pdio.read_files(libpysal.examples.get_path("sids2.dbf"))
w = libpysal.io.open(libpysal.examples.get_path("sids2.gal")).read()
- k = int(w.n/2)
- mi = moran.Moran.by_col(df, ['SIDR74'], w=w, two_tailed=False)
+ k = int(w.n / 2)
+ mi = Moran.by_col(df, ["SIDR74"], w=w, two_tailed=False)
rho = np.unique(mi.SIDR74_moran.values).item()
sm = Smaup(w.n, k, rho)
np.testing.assert_allclose(sm.smaup, 0.15176796553181948, atol=ATOL, rtol=RTOL)
self.assertAlmostEqual(sm.critical_01, 0.23404000000000003)
self.assertAlmostEqual(sm.critical_05, 0.21088)
self.assertAlmostEqual(sm.critical_1, 0.18239)
- self.assertEqual(sm.summary, 'Pseudo p-value > 0.10 (H0 is not rejected)')
+ self.assertEqual(sm.summary, "Pseudo p-value > 0.10 (H0 is not rejected)")
diff --git a/esda/tests/test_smoothing.py b/esda/tests/test_smoothing.py
index df8fdc28..6dbffd9c 100644
--- a/esda/tests/test_smoothing.py
+++ b/esda/tests/test_smoothing.py
@@ -1,12 +1,15 @@
import unittest
+
import libpysal
+import numpy as np
+from libpysal.common import ATOL, RTOL, pandas
from libpysal.weights.distance import KNN, Kernel
+
from .. import smoothing as sm
-import numpy as np
-from libpysal.common import RTOL, ATOL, pandas
PANDAS_EXTINCT = pandas is None
+
class TestFlatten(unittest.TestCase):
def setUp(self):
self.input = [[1, 2], [3, 3, 4], [5, 6]]
@@ -36,7 +39,7 @@ def setUp(self):
self.e = np.array([30, 25, 25, 15, 33, 21, 30, 20])
self.b = np.array([1000, 1000, 1100, 900, 1000, 900, 1100, 900])
self.s_e = np.array([100, 45, 120, 100, 50, 30, 200, 80])
- self.s_b = s = np.array([1000, 900, 1000, 900, 1000, 900, 1000, 900])
+ self.s_b = np.array([1000, 900, 1000, 900, 1000, 900, 1000, 900])
self.n = 2
def test_crude_age_standardization(self):
@@ -45,25 +48,28 @@ def test_crude_age_standardization(self):
self.assertEqual(list(crude), list(crude_exp))
def test_direct_age_standardization(self):
- direct = np.array(sm.direct_age_standardization(
- self.e, self.b, self.s_b, self.n)).round(8)
- direct_exp = np.array([[0.02374402, 0.01920491,
- 0.02904848], [0.02665072, 0.02177143, 0.03230508]])
+ direct = np.array(
+ sm.direct_age_standardization(self.e, self.b, self.s_b, self.n)
+ ).round(8)
+ direct_exp = np.array(
+ [[0.02374402, 0.01920491, 0.02904848], [0.02665072, 0.02177143, 0.03230508]]
+ )
self.assertEqual(list(direct.flatten()), list(direct_exp.flatten()))
def test_indirect_age_standardization(self):
- indirect = np.array(sm.indirect_age_standardization(
- self.e, self.b, self.s_e, self.s_b, self.n)).round(8)
- indirect_exp = np.array([[0.02372382, 0.01940230,
- 0.02900789], [0.02610803, .02154304, 0.03164035]])
- self.assertEqual(
- list(indirect.flatten()), list(indirect_exp.flatten()))
+ indirect = np.array(
+ sm.indirect_age_standardization(self.e, self.b, self.s_e, self.s_b, self.n)
+ ).round(8)
+ indirect_exp = np.array(
+ [[0.02372382, 0.01940230, 0.02900789], [0.02610803, 0.02154304, 0.03164035]]
+ )
+ self.assertEqual(list(indirect.flatten()), list(indirect_exp.flatten()))
class TestSRate(unittest.TestCase):
def setUp(self):
- sids = libpysal.io.open(libpysal.examples.get_path('sids2.dbf'), 'r')
- self.w = libpysal.io.open(libpysal.examples.get_path('sids2.gal'), 'r').read()
+ sids = libpysal.io.open(libpysal.examples.get_path("sids2.dbf"), "r")
+ self.w = libpysal.io.open(libpysal.examples.get_path("sids2.gal"), "r").read()
self.b, self.e = np.array(sids[:, 8]), np.array(sids[:, 9])
self.er = [0.453433, 0.000000, 0.775871, 0.973810, 3.133190]
self.eb = [0.0016973, 0.0017054, 0.0017731, 0.0020129, 0.0035349]
@@ -71,99 +77,101 @@ def setUp(self):
self.smr = [0.00083622, 0.00109402, 0.00081567, 0.0, 0.0048209]
self.smr_w = [0.00127146, 0.00127146, 0.0008433, 0.0, 0.0049889]
self.smr2 = [0.00091659, 0.00087641, 0.00091073, 0.0, 0.00467633]
- self.s_ebr10 = np.array([4.01485749e-05, 3.62437513e-05,
- 4.93034844e-05, 5.09387329e-05, 3.72735210e-05,
- 3.69333797e-05, 5.40245456e-05, 2.99806055e-05,
- 3.73034109e-05, 3.47270722e-05]).reshape(-1,1)
-
- stl_ex = libpysal.examples.load_example('stl')
- self.stl = libpysal.io.open(stl_ex.get_path('stl_hom.csv'), 'r')
+ self.s_ebr10 = np.array(
+ [
+ 4.01485749e-05,
+ 3.62437513e-05,
+ 4.93034844e-05,
+ 5.09387329e-05,
+ 3.72735210e-05,
+ 3.69333797e-05,
+ 5.40245456e-05,
+ 2.99806055e-05,
+ 3.73034109e-05,
+ 3.47270722e-05,
+ ]
+ ).reshape(-1, 1)
+
+ stl_ex = libpysal.examples.load_example("stl")
+ self.stl = libpysal.io.open(stl_ex.get_path("stl_hom.csv"), "r")
self.stl_e, self.stl_b = np.array(self.stl[:, 10]), np.array(self.stl[:, 13])
- self.stl_w = libpysal.io.open(stl_ex.get_path('stl.gal'), 'r').read()
+ self.stl_w = libpysal.io.open(stl_ex.get_path("stl.gal"), "r").read()
if not self.stl_w.id_order_set:
self.stl_w.id_order = list(range(1, len(self.stl) + 1))
if not PANDAS_EXTINCT:
- self.df = libpysal.io.open(libpysal.examples.get_path('sids2.dbf')).to_df()
- self.ename = 'SID74'
- self.bname = 'BIR74'
- self.stl_df = libpysal.io.open(libpysal.examples.get_path('stl_hom.csv')).to_df()
- self.stl_ename = 'HC7984'
- self.stl_bname = 'PO7984'
+ self.df = libpysal.io.open(libpysal.examples.get_path("sids2.dbf")).to_df()
+ self.ename = "SID74"
+ self.bname = "BIR74"
+ self.stl_df = libpysal.io.open(
+ libpysal.examples.get_path("stl_hom.csv")
+ ).to_df()
+ self.stl_ename = "HC7984"
+ self.stl_bname = "PO7984"
def test_Excess_Risk(self):
out_er = sm.Excess_Risk(self.e, self.b).r
- np.testing.assert_allclose(out_er[:5].flatten(), self.er,
- rtol=RTOL, atol=ATOL)
+ np.testing.assert_allclose(out_er[:5].flatten(), self.er, rtol=RTOL, atol=ATOL)
- @unittest.skipIf(PANDAS_EXTINCT, 'missing pandas')
+ @unittest.skipIf(PANDAS_EXTINCT, "missing pandas")
def test_Excess_Risk_tabular(self):
out_er = sm.Excess_Risk(self.df[self.ename], self.df[self.bname]).r
- np.testing.assert_allclose(out_er[:5].flatten(), self.er,
- rtol=RTOL, atol=ATOL)
+ np.testing.assert_allclose(out_er[:5].flatten(), self.er, rtol=RTOL, atol=ATOL)
self.assertIsInstance(out_er, np.ndarray)
out_er = sm.Excess_Risk.by_col(self.df, self.ename, self.bname)
- outcol = '{}-{}_excess_risk'.format(self.ename, self.bname)
+ outcol = "{}-{}_excess_risk".format(self.ename, self.bname)
outcol = out_er[outcol]
- np.testing.assert_allclose(outcol[:5], self.er,
- rtol=RTOL, atol=ATOL)
+ np.testing.assert_allclose(outcol[:5], self.er, rtol=RTOL, atol=ATOL)
self.assertIsInstance(outcol.values, np.ndarray)
def test_Empirical_Bayes(self):
out_eb = sm.Empirical_Bayes(self.e, self.b).r
- np.testing.assert_allclose(out_eb[:5].flatten(), self.eb,
- rtol=RTOL, atol=ATOL)
+ np.testing.assert_allclose(out_eb[:5].flatten(), self.eb, rtol=RTOL, atol=ATOL)
- @unittest.skipIf(PANDAS_EXTINCT, 'missing pandas')
+ @unittest.skipIf(PANDAS_EXTINCT, "missing pandas")
def test_Empirical_Bayes_tabular(self):
out_eb = sm.Empirical_Bayes(self.df[self.ename], self.df[self.bname]).r
- np.testing.assert_allclose(out_eb[:5].flatten(), self.eb,
- rtol=RTOL, atol=ATOL)
+ np.testing.assert_allclose(out_eb[:5].flatten(), self.eb, rtol=RTOL, atol=ATOL)
self.assertIsInstance(out_eb, np.ndarray)
out_eb = sm.Empirical_Bayes.by_col(self.df, self.ename, self.bname)
- outcol = '{}-{}_empirical_bayes'.format(self.ename, self.bname)
+ outcol = "{}-{}_empirical_bayes".format(self.ename, self.bname)
outcol = out_eb[outcol]
- np.testing.assert_allclose(outcol[:5], self.eb,
- rtol=RTOL, atol=ATOL)
+ np.testing.assert_allclose(outcol[:5], self.eb, rtol=RTOL, atol=ATOL)
self.assertIsInstance(outcol.values, np.ndarray)
def test_Spatial_Empirical_Bayes(self):
s_eb = sm.Spatial_Empirical_Bayes(self.stl_e, self.stl_b, self.stl_w)
- np.testing.assert_allclose(self.s_ebr10, s_eb.r[:10],
- rtol=RTOL, atol=ATOL)
+ np.testing.assert_allclose(self.s_ebr10, s_eb.r[:10], rtol=RTOL, atol=ATOL)
- @unittest.skipIf(PANDAS_EXTINCT, 'missing pandas')
+ @unittest.skipIf(PANDAS_EXTINCT, "missing pandas")
def test_Spatial_Empirical_Bayes_tabular(self):
- s_eb = sm.Spatial_Empirical_Bayes(self.stl_df[self.stl_ename],
- self.stl_df[self.stl_bname],
- self.stl_w).r
+ s_eb = sm.Spatial_Empirical_Bayes(
+ self.stl_df[self.stl_ename], self.stl_df[self.stl_bname], self.stl_w
+ ).r
self.assertIsInstance(s_eb, np.ndarray)
np.testing.assert_allclose(self.s_ebr10, s_eb[:10])
- s_eb = sm.Spatial_Empirical_Bayes.by_col(self.stl_df,
- self.stl_ename,
- self.stl_bname,
- self.stl_w)
- outcol = '{}-{}_spatial_empirical_bayes'.format(self.stl_ename, self.stl_bname)
+ s_eb = sm.Spatial_Empirical_Bayes.by_col(
+ self.stl_df, self.stl_ename, self.stl_bname, self.stl_w
+ )
+ outcol = "{}-{}_spatial_empirical_bayes".format(self.stl_ename, self.stl_bname)
r = s_eb[outcol].values
self.assertIsInstance(r, np.ndarray)
- np.testing.assert_allclose(self.s_ebr10, r[:10].reshape(-1,1))
+ np.testing.assert_allclose(self.s_ebr10, r[:10].reshape(-1, 1))
def test_Spatial_Rate(self):
out_sr = sm.Spatial_Rate(self.e, self.b, self.w).r
- np.testing.assert_allclose(out_sr[:5].flatten(), self.sr,
- rtol=RTOL, atol=ATOL)
+ np.testing.assert_allclose(out_sr[:5].flatten(), self.sr, rtol=RTOL, atol=ATOL)
- @unittest.skipIf(PANDAS_EXTINCT, 'missing pandas')
+ @unittest.skipIf(PANDAS_EXTINCT, "missing pandas")
def test_Spatial_Rate_tabular(self):
out_sr = sm.Spatial_Rate(self.df[self.ename], self.df[self.bname], self.w).r
- np.testing.assert_allclose(out_sr[:5].flatten(), self.sr,
- rtol=RTOL, atol=ATOL)
+ np.testing.assert_allclose(out_sr[:5].flatten(), self.sr, rtol=RTOL, atol=ATOL)
self.assertIsInstance(out_sr, np.ndarray)
- out_sr = sm.Spatial_Rate.by_col(self.df, self.ename, self.bname,w=self.w)
- outcol = '{}-{}_spatial_rate'.format(self.ename, self.bname)
+ out_sr = sm.Spatial_Rate.by_col(self.df, self.ename, self.bname, w=self.w)
+ outcol = "{}-{}_spatial_rate".format(self.ename, self.bname)
r = out_sr[outcol].values
self.assertIsInstance(r, np.ndarray)
np.testing.assert_allclose(r[:5], self.sr, rtol=RTOL, atol=ATOL)
@@ -172,129 +180,139 @@ def test_Spatial_Median_Rate(self):
out_smr = sm.Spatial_Median_Rate(self.e, self.b, self.w).r
out_smr_w = sm.Spatial_Median_Rate(self.e, self.b, self.w, aw=self.b).r
out_smr2 = sm.Spatial_Median_Rate(self.e, self.b, self.w, iteration=2).r
- np.testing.assert_allclose(out_smr[:5].flatten(), self.smr,
- atol=ATOL, rtol=RTOL)
- np.testing.assert_allclose(out_smr_w[:5].flatten(), self.smr_w,
- atol=ATOL, rtol=RTOL)
- np.testing.assert_allclose(out_smr2[:5].flatten(), self.smr2,
- atol=ATOL, rtol=RTOL)
-
- @unittest.skipIf(PANDAS_EXTINCT, 'missing pandas')
+ np.testing.assert_allclose(
+ out_smr[:5].flatten(), self.smr, atol=ATOL, rtol=RTOL
+ )
+ np.testing.assert_allclose(
+ out_smr_w[:5].flatten(), self.smr_w, atol=ATOL, rtol=RTOL
+ )
+ np.testing.assert_allclose(
+ out_smr2[:5].flatten(), self.smr2, atol=ATOL, rtol=RTOL
+ )
+
+ @unittest.skipIf(PANDAS_EXTINCT, "missing pandas")
def test_Spatial_Median_Rate_tabular(self):
- out_smr = sm.Spatial_Median_Rate(self.df[self.ename],
- self.df[self.bname],
- self.w).r
- out_smr_w = sm.Spatial_Median_Rate(self.df[self.ename],
- self.df[self.bname],
- self.w,
- aw = self.df[self.bname]).r
- out_smr2 = sm.Spatial_Median_Rate(self.df[self.ename],
- self.df[self.bname],
- self.w,
- iteration=2).r
+ out_smr = sm.Spatial_Median_Rate(
+ self.df[self.ename], self.df[self.bname], self.w
+ ).r
+ out_smr_w = sm.Spatial_Median_Rate(
+ self.df[self.ename], self.df[self.bname], self.w, aw=self.df[self.bname]
+ ).r
+ out_smr2 = sm.Spatial_Median_Rate(
+ self.df[self.ename], self.df[self.bname], self.w, iteration=2
+ ).r
self.assertIsInstance(out_smr, np.ndarray)
self.assertIsInstance(out_smr_w, np.ndarray)
self.assertIsInstance(out_smr2, np.ndarray)
- np.testing.assert_allclose(out_smr[:5].flatten(), self.smr,
- atol=ATOL, rtol=RTOL)
- np.testing.assert_allclose(out_smr_w[:5].flatten(), self.smr_w,
- atol=ATOL, rtol=RTOL)
- np.testing.assert_allclose(out_smr2[:5].flatten(), self.smr2,
- atol=ATOL, rtol=RTOL)
-
- out_smr = sm.Spatial_Median_Rate.by_col(self.df, self.ename,
- self.bname, self.w)
- out_smr_w = sm.Spatial_Median_Rate.by_col(self.df, self.ename,
- self.bname, self.w,
- aw = self.df[self.bname])
- out_smr2 = sm.Spatial_Median_Rate.by_col(self.df, self.ename,
- self.bname, self.w,
- iteration=2)
- outcol = '{}-{}_spatial_median_rate'.format(self.ename, self.bname)
-
- np.testing.assert_allclose(out_smr[outcol].values[:5], self.smr,
- rtol=RTOL, atol=ATOL)
- np.testing.assert_allclose(out_smr_w[outcol].values[:5], self.smr_w,
- rtol=RTOL, atol=ATOL)
- np.testing.assert_allclose(out_smr2[outcol].values[:5], self.smr2,
- rtol=RTOL, atol=ATOL)
-
- @unittest.skipIf(PANDAS_EXTINCT, 'missing pandas')
+ np.testing.assert_allclose(
+ out_smr[:5].flatten(), self.smr, atol=ATOL, rtol=RTOL
+ )
+ np.testing.assert_allclose(
+ out_smr_w[:5].flatten(), self.smr_w, atol=ATOL, rtol=RTOL
+ )
+ np.testing.assert_allclose(
+ out_smr2[:5].flatten(), self.smr2, atol=ATOL, rtol=RTOL
+ )
+
+ out_smr = sm.Spatial_Median_Rate.by_col(self.df, self.ename, self.bname, self.w)
+ out_smr_w = sm.Spatial_Median_Rate.by_col(
+ self.df, self.ename, self.bname, self.w, aw=self.df[self.bname]
+ )
+ out_smr2 = sm.Spatial_Median_Rate.by_col(
+ self.df, self.ename, self.bname, self.w, iteration=2
+ )
+ outcol = "{}-{}_spatial_median_rate".format(self.ename, self.bname)
+
+ np.testing.assert_allclose(
+ out_smr[outcol].values[:5], self.smr, rtol=RTOL, atol=ATOL
+ )
+ np.testing.assert_allclose(
+ out_smr_w[outcol].values[:5], self.smr_w, rtol=RTOL, atol=ATOL
+ )
+ np.testing.assert_allclose(
+ out_smr2[outcol].values[:5], self.smr2, rtol=RTOL, atol=ATOL
+ )
+
+ @unittest.skipIf(PANDAS_EXTINCT, "missing pandas")
def test_Spatial_Smoother_multicol(self):
"""
test that specifying multiple columns works correctly. Since the
function is shared over all spatial smoothers, we can only test one.
"""
- enames = [self.ename, 'SID79']
- bnames = [self.bname, 'BIR79']
+ enames = [self.ename, "SID79"]
+ bnames = [self.bname, "BIR79"]
out_df = sm.Spatial_Median_Rate.by_col(self.df, enames, bnames, self.w)
- outcols = ['{}-{}_spatial_median_rate'.format(e,b)
- for e,b in zip(enames, bnames)]
- smr79 = np.array([0.00122129, 0.00176924, 0.00176924,
- 0.00240964, 0.00272035])
+ outcols = [
+ "{}-{}_spatial_median_rate".format(e, b) for e, b in zip(enames, bnames)
+ ]
+ smr79 = np.array(
+ [0.00122129, 0.00176924, 0.00176924, 0.00240964, 0.00272035]
+ )
answers = [self.smr, smr79]
for col, answer in zip(outcols, answer):
self.assertIn(out_df.columns, col)
- np.testing.assert_allclose(out_df[col].values[:5], answer,
- rtol=RTOL, atol=ATOL)
+ np.testing.assert_allclose(
+ out_df[col].values[:5], answer, rtol=RTOL, atol=ATOL
+ )
- @unittest.skipIf(PANDAS_EXTINCT, 'missing pandas')
+ @unittest.skipIf(PANDAS_EXTINCT, "missing pandas")
def test_Smoother_multicol(self):
"""
test that non-spatial smoothers work with multicolumn queries
"""
- enames = [self.ename, 'SID79']
- bnames = [self.bname, 'BIR79']
+ enames = [self.ename, "SID79"]
+ bnames = [self.bname, "BIR79"]
out_df = sm.Excess_Risk.by_col(self.df, enames, bnames)
- outcols = ['{}-{}_excess_risk'.format(e,b)
- for e,b in zip(enames, bnames)]
- er79 = np.array([0.000000, 2.796607, 0.8383863,
- 1.217479, 0.943811])
+ outcols = ["{}-{}_excess_risk".format(e, b) for e, b in zip(enames, bnames)]
+ er79 = np.array([0.000000, 2.796607, 0.8383863, 1.217479, 0.943811])
answers = [self.er, er79]
for col, answer in zip(outcols, answer):
self.assertIn(out_df.columns, col)
- np.testing.assert_allclose(out_df[col].values[:5], answer,
- rtol=RTOL, atol=ATOL)
+ np.testing.assert_allclose(
+ out_df[col].values[:5], answer, rtol=RTOL, atol=ATOL
+ )
+
class TestHB(unittest.TestCase):
def setUp(self):
- sids = libpysal.io.open(libpysal.examples.get_path('sids2.shp'), 'r')
+ sids = libpysal.io.open(libpysal.examples.get_path("sids2.shp"), "r")
self.sids = sids
self.d = np.array([i.centroid for i in sids])
self.w = KNN.from_array(self.d, k=5)
if not self.w.id_order_set:
self.w.id_order = self.w.id_order
- sids_db = libpysal.io.open(libpysal.examples.get_path('sids2.dbf'), 'r')
+ sids_db = libpysal.io.open(libpysal.examples.get_path("sids2.dbf"), "r")
self.b, self.e = np.array(sids_db[:, 8]), np.array(sids_db[:, 9])
- self.sids_hb_rr5 = np.array([0.00075586, 0.,
- 0.0008285, 0.0018315, 0.00498891])
- self.sids_hb_r2r5 = np.array([0.0008285, 0.00084331,
- 0.00086896, 0.0018315, 0.00498891])
- self.sids_hb_r3r5 = np.array([0.00091659, 0.,
- 0.00156838, 0.0018315, 0.00498891])
+ self.sids_hb_rr5 = np.array([0.00075586, 0.0, 0.0008285, 0.0018315, 0.00498891])
+ self.sids_hb_r2r5 = np.array(
+ [0.0008285, 0.00084331, 0.00086896, 0.0018315, 0.00498891]
+ )
+ self.sids_hb_r3r5 = np.array(
+ [0.00091659, 0.0, 0.00156838, 0.0018315, 0.00498891]
+ )
if not PANDAS_EXTINCT:
self.df = sids_db.to_df()
- self.ename = 'SID74'
- self.bname = 'BIR74'
- self.enames = [self.ename, 'SID79']
- self.bnames = [self.bname, 'BIR79']
- self.sids79r = np.array([.000563, .001659, .001879,
- .002410, .002720])
- @unittest.skip('Deprecated')
+ self.ename = "SID74"
+ self.bname = "BIR74"
+ self.enames = [self.ename, "SID79"]
+ self.bnames = [self.bname, "BIR79"]
+ self.sids79r = np.array([0.000563, 0.001659, 0.001879, 0.002410, 0.002720])
+
+ @unittest.skip("Deprecated")
def test_Headbanging_Triples(self):
ht = sm.Headbanging_Triples(self.d, self.w)
self.assertEqual(len(ht.triples), len(self.d))
ht2 = sm.Headbanging_Triples(self.d, self.w, edgecor=True)
- self.assertTrue(hasattr(ht2, 'extra'))
+ self.assertTrue(hasattr(ht2, "extra"))
self.assertEqual(len(ht2.triples), len(self.d))
htr = sm.Headbanging_Median_Rate(self.e, self.b, ht2, iteration=5)
self.assertEqual(len(htr.r), len(self.e))
for i in htr.r:
self.assertTrue(i is not None)
- @unittest.skip('Deprecated')
+ @unittest.skip("Deprecated")
def test_Headbanging_Median_Rate(self):
s_ht = sm.Headbanging_Triples(self.d, self.w, k=5)
sids_hb_r = sm.Headbanging_Median_Rate(self.e, self.b, s_ht)
@@ -304,39 +322,45 @@ def test_Headbanging_Median_Rate(self):
sids_hb_r3 = sm.Headbanging_Median_Rate(self.e, self.b, s_ht, aw=self.b)
np.testing.assert_array_almost_equal(self.sids_hb_r3r5, sids_hb_r3.r[:5])
- #@unittest.skipIf(PANDAS_EXTINCT, 'missing pandas')
- @unittest.skip('Deprecated')
+ # @unittest.skipIf(PANDAS_EXTINCT, 'missing pandas')
+ @unittest.skip("Deprecated")
def test_Headbanging_Median_Rate_tabular(self):
# test that dataframe columns are treated correctly
s_ht = sm.Headbanging_Triples(self.d, self.w, k=5)
- sids_hb_r = sm.Headbanging_Median_Rate(self.df[self.ename],
- self.df[self.bname], s_ht)
+ sids_hb_r = sm.Headbanging_Median_Rate(
+ self.df[self.ename], self.df[self.bname], s_ht
+ )
self.assertIsInstance(sids_hb_r.r, np.ndarray)
np.testing.assert_array_almost_equal(self.sids_hb_rr5, sids_hb_r.r[:5])
- sids_hb_r2 = sm.Headbanging_Median_Rate(self.df[self.ename],
- self.df[self.bname], s_ht,
- iteration=5)
+ sids_hb_r2 = sm.Headbanging_Median_Rate(
+ self.df[self.ename], self.df[self.bname], s_ht, iteration=5
+ )
self.assertIsInstance(sids_hb_r2.r, np.ndarray)
np.testing.assert_array_almost_equal(self.sids_hb_r2r5, sids_hb_r2.r[:5])
- sids_hb_r3 = sm.Headbanging_Median_Rate(self.df[self.ename],
- self.df[self.bname], s_ht,
- aw=self.df[self.bname])
+ sids_hb_r3 = sm.Headbanging_Median_Rate(
+ self.df[self.ename], self.df[self.bname], s_ht, aw=self.df[self.bname]
+ )
self.assertIsInstance(sids_hb_r3.r, np.ndarray)
np.testing.assert_array_almost_equal(self.sids_hb_r3r5, sids_hb_r3.r[:5])
- #test that the by col on multiple names works correctly
- sids_hr_df = sm.Headbanging_Median_Rate.by_col(self.df, self.enames,
- self.bnames, w=self.w)
- outcols = ['{}-{}_headbanging_median_rate'.format(e,b) for e,b in
- zip(self.enames, self.bnames)]
+ # test that the by col on multiple names works correctly
+ sids_hr_df = sm.Headbanging_Median_Rate.by_col(
+ self.df, self.enames, self.bnames, w=self.w
+ )
+ outcols = [
+ "{}-{}_headbanging_median_rate".format(e, b)
+ for e, b in zip(self.enames, self.bnames)
+ ]
for col, answer in zip(outcols, [self.sids_hb_rr5, self.sids79r]):
this_col = sids_hr_df[col].values
self.assertIsInstance(this_col, np.ndarray)
- np.testing.assert_allclose(sids_hr_df[col][:5], answer,
- rtol=RTOL, atol=ATOL*10)
+ np.testing.assert_allclose(
+ sids_hr_df[col][:5], answer, rtol=RTOL, atol=ATOL * 10
+ )
+
class TestKernel_AgeAdj_SM(unittest.TestCase):
def setUp(self):
@@ -345,8 +369,7 @@ def setUp(self):
self.e1 = np.array([10, 8, 1, 4, 3, 5, 4, 3, 2, 1, 5, 3])
self.b1 = np.array([100, 90, 15, 30, 25, 20, 30, 20, 80, 80, 90, 60])
self.s = np.array([98, 88, 15, 29, 20, 23, 33, 25, 76, 80, 89, 66])
- self.points = [(
- 10, 10), (20, 10), (40, 10), (15, 20), (30, 20), (30, 30)]
+ self.points = [(10, 10), (20, 10), (40, 10), (15, 20), (30, 20), (30, 30)]
self.kw = Kernel(self.points)
self.points1 = np.array(self.points) + 5
self.points1 = np.vstack((self.points1, self.points))
@@ -355,43 +378,88 @@ def setUp(self):
self.kw.id_order = list(range(0, len(self.points)))
if not PANDAS_EXTINCT:
import pandas as pd
+
dfa = np.array([self.e, self.b]).T
dfb = np.array([self.e1, self.b1, self.s]).T
- self.dfa = pd.DataFrame(dfa, columns=['e','b'])
- self.dfb = pd.DataFrame(dfb, columns=['e', 'b', 's'])
-
- #answers
- self.kernel_exp = [0.10543301, 0.0858573, 0.08256196, 0.09884584,
- 0.04756872, 0.04845298]
- self.ageadj_exp = [0.10519625, 0.08494318, 0.06440072, 0.06898604,
- 0.06952076, 0.05020968]
- self.disk_exp = [0.12222222000000001, 0.10833333, 0.08055556,
- 0.08944444, 0.09944444, 0.09351852]
- self.sf_exp = np.array([ 0.111111, 0.111111, 0.085106, 0.076923])
+ self.dfa = pd.DataFrame(dfa, columns=["e", "b"])
+ self.dfb = pd.DataFrame(dfb, columns=["e", "b", "s"])
+
+ # answers
+ self.kernel_exp = [
+ 0.10543301,
+ 0.0858573,
+ 0.08256196,
+ 0.09884584,
+ 0.04756872,
+ 0.04845298,
+ ]
+ self.ageadj_exp = [
+ 0.10519625,
+ 0.08494318,
+ 0.06440072,
+ 0.06898604,
+ 0.06952076,
+ 0.05020968,
+ ]
+ self.disk_exp = [
+ 0.12222222000000001,
+ 0.10833333,
+ 0.08055556,
+ 0.08944444,
+ 0.09944444,
+ 0.09351852,
+ ]
+ self.sf_exp = np.array([0.111111, 0.111111, 0.085106, 0.076923])
def test_Kernel_Smoother(self):
kr = sm.Kernel_Smoother(self.e, self.b, self.kw)
np.testing.assert_allclose(kr.r.flatten(), self.kernel_exp)
- @unittest.skipIf(PANDAS_EXTINCT, 'missing pandas')
+ @unittest.skipIf(PANDAS_EXTINCT, "missing pandas")
def test_Kernel_Smoother_tabular(self):
dfa, dfb = self.dfa, self.dfb
- kr = sm.Kernel_Smoother(dfa['e'], dfa['b'], self.kw)
+ kr = sm.Kernel_Smoother(dfa["e"], dfa["b"], self.kw)
np.testing.assert_allclose(kr.r.flatten(), kernel_exp)
- kr = sm.Kernel_Smoother.by_col(dfa, 'e', 'b', w=self.kw)
- colname = 'e_b_kernel_smoother'
+ kr = sm.Kernel_Smoother.by_col(dfa, "e", "b", w=self.kw)
+ colname = "e_b_kernel_smoother"
np.testing.assert_allclose(kr[colname].values, kernel_exp)
- kr = sm.Kernel_Smoother.by_col(dfb, ['e', 's'], 'b', w=self.kw)
- outcols = ['{}-b_kernel_smoother'.format(l) for l in ['e','s']]
-
- exp_eb = np.array([ 0.08276363, 0.08096262, 0.03636364, 0.0704302 ,
- 0.07996067, 0.1287226 , 0.09831286, 0.0952105 ,
- 0.02857143, 0.06671039, 0.07129231, 0.08078792])
- exp_sb = np.array([ 1.00575463, 0.99597005, 0.96363636, 0.99440132,
- 0.98468399, 1.07912333, 1.03376267, 1.02759815,
- 0.95428572, 0.99716186, 0.98277235, 1.03906155])
+ kr = sm.Kernel_Smoother.by_col(dfb, ["e", "s"], "b", w=self.kw)
+ outcols = ["{}-b_kernel_smoother".format(l) for l in ["e", "s"]] # noqa E741
+
+ exp_eb = np.array(
+ [
+ 0.08276363,
+ 0.08096262,
+ 0.03636364,
+ 0.0704302,
+ 0.07996067,
+ 0.1287226,
+ 0.09831286,
+ 0.0952105,
+ 0.02857143,
+ 0.06671039,
+ 0.07129231,
+ 0.08078792,
+ ]
+ )
+ exp_sb = np.array(
+ [
+ 1.00575463,
+ 0.99597005,
+ 0.96363636,
+ 0.99440132,
+ 0.98468399,
+ 1.07912333,
+ 1.03376267,
+ 1.02759815,
+ 0.95428572,
+ 0.99716186,
+ 0.98277235,
+ 1.03906155,
+ ]
+ )
for name, answer in zip(outcols, [exp_eb, exp_sb]):
np.testing.assert_allclose(kr[name].values, answer, rtol=RTOL, atol=ATOL)
@@ -399,35 +467,37 @@ def test_Age_Adjusted_Smoother(self):
ar = sm.Age_Adjusted_Smoother(self.e1, self.b1, self.kw, self.s)
np.testing.assert_allclose(ar.r, self.ageadj_exp)
- @unittest.skipIf(PANDAS_EXTINCT, 'missing pandas')
+ @unittest.skipIf(PANDAS_EXTINCT, "missing pandas")
def test_Age_Adjusted_Smoother_tabular(self):
dfb = self.dfb
kr = sm.Age_Adjusted_Smoother(dfb.e, dfb.b, s=dfb.s, w=self.kw)
self.assertIsInstance(kr.r, np.ndarray)
np.testing.assert_allclose(kr.r, self.ageadj_exp)
- kr = sm.Age_Adjusted_Smoother.by_col(dfb, 'e', 'b', w=self.kw, s='s')
- answer = np.array([ 0.10519625, 0.08494318, 0.06440072,
- 0.06898604, 0.06952076, 0.05020968])
- colname = 'e-b_age_adjusted_smoother'
+ kr = sm.Age_Adjusted_Smoother.by_col(dfb, "e", "b", w=self.kw, s="s")
+ answer = np.array(
+ [0.10519625, 0.08494318, 0.06440072, 0.06898604, 0.06952076, 0.05020968]
+ )
+ colname = "e-b_age_adjusted_smoother"
np.testing.assert_allclose(kr[colname].values, answer, rtol=RTOL, atol=ATOL)
def test_Disk_Smoother(self):
- self.kw.transform = 'b'
+ self.kw.transform = "b"
disk = sm.Disk_Smoother(self.e, self.b, self.kw)
np.testing.assert_allclose(disk.r.flatten(), self.disk_exp)
- @unittest.skipIf(PANDAS_EXTINCT, 'missing pandas')
+ @unittest.skipIf(PANDAS_EXTINCT, "missing pandas")
def test_Disk_Smoother_tabular(self):
- self.kw.transform = 'b'
+ self.kw.transform = "b"
dfa = self.dfa
disk = sm.Disk_Smoother(dfa.e, dfa.b, self.kw).r
np.testing.assert_allclose(disk.flatten(), self.disk_exp)
- disk = sm.Disk_Smoother.by_col(dfa, 'e', 'b', self.kw)
- col = 'e-b_disk_smoother'
- np.testing.assert_allclose(disk[col].values, self.disk_exp,
- rtol=RTOL, atol=ATOL)
+ disk = sm.Disk_Smoother.by_col(dfa, "e", "b", self.kw)
+ col = "e-b_disk_smoother"
+ np.testing.assert_allclose(
+ disk[col].values, self.disk_exp, rtol=RTOL, atol=ATOL
+ )
def test_Spatial_Filtering(self):
points = np.array(self.points)
@@ -435,35 +505,56 @@ def test_Spatial_Filtering(self):
sf = sm.Spatial_Filtering(bbox, points, self.e, self.b, 2, 2, r=30)
np.testing.assert_allclose(sf.r, self.sf_exp, rtol=RTOL, atol=ATOL)
- @unittest.skipIf(PANDAS_EXTINCT, 'missing pandas')
+ @unittest.skipIf(PANDAS_EXTINCT, "missing pandas")
def test_Kernel_Smoother_tabular(self):
point_array = np.array(self.points)
- bbox = [[0,0] , [45,45]]
+ bbox = [[0, 0], [45, 45]]
dfa = self.dfa
sf = sm.Spatial_Filtering(bbox, point_array, dfa.e, dfa.b, 2, 2, r=30)
np.testing.assert_allclose(sf.r, self.sf_exp, rtol=RTOL, atol=ATOL)
- dfa['geometry'] = self.points
- sf = sm.Spatial_Filtering.by_col(dfa, 'e', 'b', 3, 3, r=30)
- r_answer = np.array([ 0.07692308, 0.07213115, 0.07213115, 0.07692308,
- 0.07692308, 0.07692308, 0.07692308, 0.07692308,
- 0.07692308])
+ dfa["geometry"] = self.points
+ sf = sm.Spatial_Filtering.by_col(dfa, "e", "b", 3, 3, r=30)
+ r_answer = np.array(
+ [
+ 0.07692308,
+ 0.07213115,
+ 0.07213115,
+ 0.07692308,
+ 0.07692308,
+ 0.07692308,
+ 0.07692308,
+ 0.07692308,
+ 0.07692308,
+ ]
+ )
x_answer = np.array([10.0, 10.0, 10.0, 20.0, 20.0, 20.0, 30.0, 30.0, 30.0])
- y_answer = np.array([10.000000, 16.666667, 23.333333,
- 10.000000, 16.666667, 23.333333,
- 10.000000, 16.666667, 23.333333])
- columns = ['e-b_spatial_filtering_{}'.format(name) for name in ['X', 'Y', 'R']]
+ y_answer = np.array(
+ [
+ 10.000000,
+ 16.666667,
+ 23.333333,
+ 10.000000,
+ 16.666667,
+ 23.333333,
+ 10.000000,
+ 16.666667,
+ 23.333333,
+ ]
+ )
+ columns = ["e-b_spatial_filtering_{}".format(name) for name in ["X", "Y", "R"]]
for col, answer in zip(columns, [x_answer, y_answer, r_answer]):
np.testing.assert_allclose(sf[col].values, answer, rtol=RTOL, atol=ATOL)
+
class TestUtils(unittest.TestCase):
def test_sum_by_n(self):
d = np.array([10, 9, 20, 30])
w = np.array([0.5, 0.1, 0.3, 0.8])
n = 2
- exp_sum = np.array([5.9, 30.])
+ exp_sum = np.array([5.9, 30.0])
np.testing.assert_array_almost_equal(exp_sum, sm.sum_by_n(d, w, n))
def test_standardized_mortality_ratio(self):
@@ -473,8 +564,9 @@ def test_standardized_mortality_ratio(self):
s_b = np.array([1000, 900, 1000, 900, 1000, 900, 1000, 900])
n = 2
exp_smr = np.array([2.48691099, 2.73684211])
- np.testing.assert_array_almost_equal(exp_smr,
- sm.standardized_mortality_ratio(e, b, s_e, s_b, n))
+ np.testing.assert_array_almost_equal(
+ exp_smr, sm.standardized_mortality_ratio(e, b, s_e, s_b, n)
+ )
def test_choynowski(self):
e = np.array([30, 25, 25, 15, 33, 21, 30, 20])
@@ -486,19 +578,24 @@ def test_choynowski(self):
def test_assuncao_rate(self):
e = np.array([30, 25, 25, 15, 33, 21, 30, 20])
b = np.array([100, 100, 110, 90, 100, 90, 110, 90])
- exp_assuncao = np.array([1.03843594, -0.04099089, -0.56250375,
- -1.73061861])
- np.testing.assert_array_almost_equal(
- exp_assuncao, sm.assuncao_rate(e, b)[:4])
+ exp_assuncao = np.array([1.03843594, -0.04099089, -0.56250375, -1.73061861])
+ np.testing.assert_array_almost_equal(exp_assuncao, sm.assuncao_rate(e, b)[:4])
suite = unittest.TestSuite()
-test_classes = [TestFlatten, TestWMean, TestAgeStd, TestSRate, TestHB,
- TestKernel_AgeAdj_SM, TestUtils]
+test_classes = [
+ TestFlatten,
+ TestWMean,
+ TestAgeStd,
+ TestSRate,
+ TestHB,
+ TestKernel_AgeAdj_SM,
+ TestUtils,
+]
for i in test_classes:
a = unittest.TestLoader().loadTestsFromTestCase(i)
suite.addTest(a)
-if __name__ == '__main__':
+if __name__ == "__main__":
runner = unittest.TextTestRunner()
runner.run(suite)
diff --git a/esda/tests/test_topo.py b/esda/tests/test_topo.py
index b2bfe95a..f4c002ca 100644
--- a/esda/tests/test_topo.py
+++ b/esda/tests/test_topo.py
@@ -1,7 +1,9 @@
-import numpy, copy, pandas
from unittest import TestCase
-from ..topo import prominence, isolation, to_elevation, weights
+import numpy
+import pandas
+
+from ..topo import isolation, prominence, to_elevation, weights
class TopoTester(TestCase):
@@ -71,8 +73,9 @@ def test_isolation_valid(self):
assert numpy.isnan(iso.loc[4, "parent_rank"])
assert (iso.dropna().parent_index == 4).all()
assert (
- iso.sort_values("marks", ascending=False).index
- == iso.sort_values("rank").index
+ iso.sort_values("marks", ascending=False).index == (
+ iso.sort_values("rank").index
+ )
).all()
assert iso.loc[3, "isolation"] == 1.5
assert iso.loc[2, "gap"] == (
@@ -88,8 +91,9 @@ def test_isolation_valid(self):
assert numpy.isnan(iso.loc[3, "parent_index"])
assert (iso.dropna().parent_index == [4, 2, 5, 5, 3]).all()
assert (
- iso.sort_values("marks", ascending=False).index
- == iso.sort_values("rank").index
+ iso.sort_values("marks", ascending=False).index == (
+ iso.sort_values("rank").index
+ )
).all()
assert iso.loc[1, "isolation"] == 1
assert iso.loc[2, "gap"] == (
diff --git a/esda/tests/test_util.py b/esda/tests/test_util.py
index 97e3410c..88d4035d 100644
--- a/esda/tests/test_util.py
+++ b/esda/tests/test_util.py
@@ -1,9 +1,10 @@
import unittest
+
import libpysal
-from .. import util
-from .. import moran
import numpy as np
+from .. import moran, util
+
class Fdr_Tester(unittest.TestCase):
def setUp(self):
diff --git a/esda/topo.py b/esda/topo.py
index ff99b9d9..1c96b41d 100644
--- a/esda/topo.py
+++ b/esda/topo.py
@@ -1,11 +1,12 @@
-import numpy, pandas
+import numpy
+import pandas
+from libpysal import weights
from scipy.spatial import distance
-from sklearn.utils import check_array
from scipy.stats import mode as most_common_value
-from libpysal import weights
+from sklearn.utils import check_array
try:
- from tqdm import tqdm as _tqdm
+ from tqdm import tqdm
HAS_TQDM = True
except ImportError:
@@ -18,27 +19,25 @@ def _passthrough(sequence):
def _resolve_metric(X, coordinates, metric):
"""
- Provide a distance function that you can use to find the distance betwen arbitrary points.
+ Provide a distance function that you can use
+ to find the distance betwen arbitrary points.
"""
if callable(metric):
distance_func = metric
elif metric.lower() == "haversine":
try:
from numba import autojit
- except:
+ except ImportError:
def autojit(func):
return func
@autojit
def harcdist(p1, p2):
- """ Compute the kernel of haversine"""
+ """Compute the kernel of haversine"""
x = numpy.sin(p2[1] - p1[1] / 2) ** 2
- y = (
- numpy.cos(p2[1])
- * numpy.cos(p1[1])
- * numpy.sin((p2[0] - p1[0]) / 2) ** 2
- )
+ cosp1, cosp2 = numpy.cos(p1[1]), numpy.cos(p2[1])
+ y = cosp2 * cosp1 * numpy.sin((p2[0] - p1[0]) / 2) ** 2
return 2 * numpy.arcsin(numpy.sqrt(x + y))
distance_func = harcdist
@@ -54,7 +53,7 @@ def harcdist(p1, p2):
)
def lookup_distance(a, b):
- """ Find location of points a,b in X and return precomputed distances"""
+ """Find location of points a,b in X and return precomputed distances"""
(aloc,) = (X == a).all(axis=1).nonzero()
(bloc,) = (X == b).all(axis=1).nonzero()
if (len(aloc) > 1) or (len(bloc) > 1):
@@ -99,8 +98,9 @@ def isolation(
(N, p) array of data to use as input. If p > 1, the "elevation" is computed
using the topo.to_elevation function.
coordinates : numpy.ndarray
- (N,k) array of locations for X to compute distances. If metric='precomputed', this
- should contain the distances from each point to every other point, and k == N.
+ (N,k) array of locations for X to compute distances. If
+ metric='precomputed', this should contain the distances from
+ each point to every other point, and k == N.
metric : string or callable (default: 'euclidean')
name of distance metric in scipy.spatial.distance, or function, that can be
used to compute distances between locations. If 'precomputed', ad-hoc function
@@ -134,10 +134,7 @@ def isolation(
if progressbar and HAS_TQDM:
pbar = tqdm
elif progressbar and (not HAS_TQDM):
- try:
- import tqdm
- except ImportError as e:
- raise ImportError("the tqdm module is required for progressbars")
+ raise ImportError("The `tqdm` module is required for progressbars.")
else:
pbar = _passthrough
@@ -202,8 +199,8 @@ def prominence(
Returns
-------
- the prominence of each observation in X, possibly along with the set of saddle points,
- peaks, and/or dominating peak tree.
+ the prominence of each observation in X, possibly along with the
+ set of saddle points, peaks, and/or dominating peak tree.
Notes
-----
@@ -242,10 +239,7 @@ def prominence(
if progressbar and HAS_TQDM:
pbar = tqdm
elif progressbar and (not HAS_TQDM):
- try:
- import tqdm
- except ImportError as e:
- raise ImportError("the tqdm module is required for progressbars")
+ raise ImportError("The `tqdm` module is required for progressbars.")
else:
pbar = _passthrough
@@ -260,9 +254,10 @@ def prominence(
msg = "assessing {} (rank: {}, value: {})".format(this_full_ix, rank, value)
# use the dominating_peak vector. A new obs either has:
- # 1. neighbors whose dominating_peak are all -1 (new peak)
- # 2. neighbors whose dominating_peak are all -1 or an integer (slope of current peak)
- # 3. neighbors whose dominating_peak include at least two integers and any -1 (key col)
+ # Neighbors whose dominating_peak...
+ # 1. ... are all -1 (new peak)
+ # 2. ... are all -1 or an integer (slope of current peak)
+ # 3. ... include at least two integers and any -1 (key col)
_, neighbs = connectivity[this_full_ix].toarray().nonzero()
this_preds = predecessors[neighbs]
@@ -336,23 +331,24 @@ def prominence(
if verbose:
print(
- "--------------------------------------------\nat the {}"
- " iteration:\n{}\n\tpeaks\t{}\n\tprominence\t{}\n\tkey_cols\t{}\n"
- .format(rank, msg, peaks, prominence, key_cols)
+ (
+ "--------------------------------------------\n"
+ "at the {rank} iteration:\n{msg}\n\tpeaks\t{peaks}\n"
+ "\tprominence\t{prominence}\n\tkey_cols\t{key_cols}\n"
+ )
)
if gdf is not None:
peakframe = gdf.iloc[peaks]
keycolframe = gdf.iloc[list(key_cols.values())]
- slopeframe = gdf[
- (~(gdf.index.isin(peakframe.index) | gdf.index.isin(keycolframe.index)))
- & mask
- ]
+ _isin_peak = gdf.index.isin(peakframe.index)
+ _isin_keycol = gdf.index.isin(keycolframe.index)
+ slopeframe = gdf[~(_isin_peak | _isin_keycol) & mask]
rest = gdf[~mask]
this_geom = gdf.iloc[[this_full_ix]]
- ax = rest.plot(edgecolor="k", linewidth=0.1, facecolor="lightblue")
- ax = slopeframe.plot(edgecolor="k", linewidth=0.1, facecolor="linen", ax=ax)
- ax = keycolframe.plot(edgecolor="k", linewidth=0.1, facecolor="red", ax=ax)
- ax = peakframe.plot(edgecolor="k", linewidth=0.1, facecolor="yellow", ax=ax)
+ ax = rest.plot(edgecolor="k", lw=0.1, fc="lightblue")
+ ax = slopeframe.plot(ec="k", lw=0.1, fc="linen", ax=ax)
+ ax = keycolframe.plot(ec="k", lw=0.1, fc="red", ax=ax)
+ ax = peakframe.plot(ec="k", lw=0.1, fc="yellow", ax=ax)
ax = this_geom.centroid.plot(ax=ax, color="orange", marker="*")
plt.show()
command = input()
@@ -391,14 +387,16 @@ def to_elevation(X, middle="mean", metric="euclidean"):
X : numpy.ndarray
Array of values for which to compute elevation.
middle : callable or string
- name of function in numpy (or function itself) used to compute the center point of X
+ name of function in numpy (or function itself)
+ used to compute the center point of X
metric : string
- metric to use in `scipy.spatial.distance.cdist` to compute the distance from the center
- of mass to the point.
+ metric to use in `scipy.spatial.distance.cdist` to
+ compute the distance from the center of mass to the point.
Returns
--------
- (N,1)-shaped numpy array containing the "elevation" of each point relative to sea level (zero).
+ (N,1)-shaped numpy array containing the "elevation" of
+ each point relative to sea level (zero).
"""
if X.ndim == 1:
@@ -429,7 +427,7 @@ def _check_connectivity(connectivity_or_coordinates):
3. a set of coordinates that we need to build the delaunay triangulation
and then return the graph.
"""
- from scipy.sparse import issparse, csc_matrix
+ from scipy.sparse import issparse
if issparse(connectivity_or_coordinates):
shape = connectivity_or_coordinates.shape
@@ -446,9 +444,10 @@ def _check_connectivity(connectivity_or_coordinates):
if __name__ == "__main__":
- import geopandas, pandas
- from libpysal import weights, examples
+ import geopandas
import matplotlib.pyplot as plt
+ import pandas # noqa F811
+ from libpysal import examples, weights # noqa F811
from matplotlib import cm
current_cmap = cm.get_cmap()
diff --git a/setup.cfg b/setup.cfg
index ac51808d..d93df069 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -5,3 +5,16 @@ versionfile_source = esda/_version.py
versionfile_build = esda/_version.py
tag_prefix = v
parentdir_prefix = esda-
+
+[flake8]
+max_line_length = 88
+exclude = docs/conf.py, versioneer.py, esda/_version.py
+ignore =
+ F403,
+ F405
+
+[black]
+line-length = 88
+
+[isort]
+profile = black
\ No newline at end of file
diff --git a/setup.py b/setup.py
index 1181f32a..71c20ac1 100644
--- a/setup.py
+++ b/setup.py
@@ -1,7 +1,9 @@
# coding: utf-8
-from setuptools import setup, find_packages
-from distutils.command.build_py import build_py
import os
+from distutils.command.build_py import build_py
+
+from setuptools import find_packages, setup
+
import versioneer
package = "esda"
diff --git a/versioneer.py b/versioneer.py
index 97130070..e283ecbf 100644
--- a/versioneer.py
+++ b/versioneer.py
@@ -1,4 +1,3 @@
-
# Version: 0.20
"""The Versioneer - like a rocketeer, but for versions.
@@ -303,11 +302,13 @@ def get_root():
setup_py = os.path.join(root, "setup.py")
versioneer_py = os.path.join(root, "versioneer.py")
if not (os.path.exists(setup_py) or os.path.exists(versioneer_py)):
- err = ("Versioneer was unable to run the project root directory. "
- "Versioneer requires setup.py to be executed from "
- "its immediate directory (like 'python setup.py COMMAND'), "
- "or in a way that lets it use sys.argv[0] to find the root "
- "(like 'python path/to/setup.py COMMAND').")
+ err = (
+ "Versioneer was unable to run the project root directory. "
+ "Versioneer requires setup.py to be executed from "
+ "its immediate directory (like 'python setup.py COMMAND'), "
+ "or in a way that lets it use sys.argv[0] to find the root "
+ "(like 'python path/to/setup.py COMMAND')."
+ )
raise VersioneerBadRootError(err)
try:
# Certain runtime workflows (setup.py install/develop in a setuptools
@@ -320,8 +321,10 @@ def get_root():
me_dir = os.path.normcase(os.path.splitext(my_path)[0])
vsr_dir = os.path.normcase(os.path.splitext(versioneer_py)[0])
if me_dir != vsr_dir:
- print("Warning: build in %s is using versioneer.py from %s"
- % (os.path.dirname(my_path), versioneer_py))
+ print(
+ "Warning: build in %s is using versioneer.py from %s"
+ % (os.path.dirname(my_path), versioneer_py)
+ )
except NameError:
pass
return root
@@ -367,16 +370,17 @@ class NotThisMethod(Exception):
def register_vcs_handler(vcs, method): # decorator
"""Create decorator to mark a method as the handler of a VCS."""
+
def decorate(f):
"""Store f in HANDLERS[vcs][method]."""
HANDLERS.setdefault(vcs, {})[method] = f
return f
+
return decorate
# pylint:disable=too-many-arguments,consider-using-with # noqa
-def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False,
- env=None):
+def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False, env=None):
"""Call the given command(s)."""
assert isinstance(commands, list)
process = None
@@ -384,10 +388,13 @@ def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False,
try:
dispcmd = str([command] + args)
# remember shell=False, so use git.cmd on windows, not just git
- process = subprocess.Popen([command] + args, cwd=cwd, env=env,
- stdout=subprocess.PIPE,
- stderr=(subprocess.PIPE if hide_stderr
- else None))
+ process = subprocess.Popen(
+ [command] + args,
+ cwd=cwd,
+ env=env,
+ stdout=subprocess.PIPE,
+ stderr=(subprocess.PIPE if hide_stderr else None),
+ )
break
except EnvironmentError:
e = sys.exc_info()[1]
@@ -410,7 +417,9 @@ def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False,
return stdout, process.returncode
-LONG_VERSION_PY['git'] = r'''
+LONG_VERSION_PY[
+ "git"
+] = r'''
# This file helps to compute a version number in source trees obtained from
# git-archive tarball (such as those provided by githubs download-from-tag
# feature). Distribution tarballs (built by setup.py sdist) and build
@@ -1091,7 +1100,7 @@ def git_versions_from_keywords(keywords, tag_prefix, verbose):
# starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of
# just "foo-1.0". If we see a "tag: " prefix, prefer those.
TAG = "tag: "
- tags = {r[len(TAG):] for r in refs if r.startswith(TAG)}
+ tags = {r[len(TAG) :] for r in refs if r.startswith(TAG)}
if not tags:
# Either we're using git < 1.8.3, or there really are no tags. We use
# a heuristic: assume all version tags have a digit. The old git %d
@@ -1100,7 +1109,7 @@ def git_versions_from_keywords(keywords, tag_prefix, verbose):
# between branches and tags. By ignoring refnames without digits, we
# filter out many common branch names like "release" and
# "stabilization", as well as "HEAD" and "master".
- tags = {r for r in refs if re.search(r'\d', r)}
+ tags = {r for r in refs if re.search(r"\d", r)}
if verbose:
print("discarding '%s', no digits" % ",".join(refs - tags))
if verbose:
@@ -1108,24 +1117,31 @@ def git_versions_from_keywords(keywords, tag_prefix, verbose):
for ref in sorted(tags):
# sorting will prefer e.g. "2.0" over "2.0rc1"
if ref.startswith(tag_prefix):
- r = ref[len(tag_prefix):]
+ r = ref[len(tag_prefix) :]
# Filter out refs that exactly match prefix or that don't start
# with a number once the prefix is stripped (mostly a concern
# when prefix is '')
- if not re.match(r'\d', r):
+ if not re.match(r"\d", r):
continue
if verbose:
print("picking %s" % r)
- return {"version": r,
- "full-revisionid": keywords["full"].strip(),
- "dirty": False, "error": None,
- "date": date}
+ return {
+ "version": r,
+ "full-revisionid": keywords["full"].strip(),
+ "dirty": False,
+ "error": None,
+ "date": date,
+ }
# no suitable tags, so version is "0+unknown", but full hex is still there
if verbose:
print("no suitable tags, using unknown + full revision id")
- return {"version": "0+unknown",
- "full-revisionid": keywords["full"].strip(),
- "dirty": False, "error": "no suitable tags", "date": None}
+ return {
+ "version": "0+unknown",
+ "full-revisionid": keywords["full"].strip(),
+ "dirty": False,
+ "error": "no suitable tags",
+ "date": None,
+ }
@register_vcs_handler("git", "pieces_from_vcs")
@@ -1140,8 +1156,7 @@ def git_pieces_from_vcs(tag_prefix, root, verbose, runner=run_command):
if sys.platform == "win32":
GITS = ["git.cmd", "git.exe"]
- _, rc = runner(GITS, ["rev-parse", "--git-dir"], cwd=root,
- hide_stderr=True)
+ _, rc = runner(GITS, ["rev-parse", "--git-dir"], cwd=root, hide_stderr=True)
if rc != 0:
if verbose:
print("Directory %s not under git control" % root)
@@ -1149,10 +1164,19 @@ def git_pieces_from_vcs(tag_prefix, root, verbose, runner=run_command):
# if there is a tag matching tag_prefix, this yields TAG-NUM-gHEX[-dirty]
# if there isn't one, this yields HEX[-dirty] (no NUM)
- describe_out, rc = runner(GITS, ["describe", "--tags", "--dirty",
- "--always", "--long",
- "--match", "%s*" % tag_prefix],
- cwd=root)
+ describe_out, rc = runner(
+ GITS,
+ [
+ "describe",
+ "--tags",
+ "--dirty",
+ "--always",
+ "--long",
+ "--match",
+ "%s*" % tag_prefix,
+ ],
+ cwd=root,
+ )
# --long was added in git-1.5.5
if describe_out is None:
raise NotThisMethod("'git describe' failed")
@@ -1167,8 +1191,7 @@ def git_pieces_from_vcs(tag_prefix, root, verbose, runner=run_command):
pieces["short"] = full_out[:7] # maybe improved later
pieces["error"] = None
- branch_name, rc = runner(GITS, ["rev-parse", "--abbrev-ref", "HEAD"],
- cwd=root)
+ branch_name, rc = runner(GITS, ["rev-parse", "--abbrev-ref", "HEAD"], cwd=root)
# --abbrev-ref was added in git-1.6.3
if rc != 0 or branch_name is None:
raise NotThisMethod("'git rev-parse --abbrev-ref' returned error")
@@ -1208,17 +1231,16 @@ def git_pieces_from_vcs(tag_prefix, root, verbose, runner=run_command):
dirty = git_describe.endswith("-dirty")
pieces["dirty"] = dirty
if dirty:
- git_describe = git_describe[:git_describe.rindex("-dirty")]
+ git_describe = git_describe[: git_describe.rindex("-dirty")]
# now we have TAG-NUM-gHEX or HEX
if "-" in git_describe:
# TAG-NUM-gHEX
- mo = re.search(r'^(.+)-(\d+)-g([0-9a-f]+)$', git_describe)
+ mo = re.search(r"^(.+)-(\d+)-g([0-9a-f]+)$", git_describe)
if not mo:
# unparseable. Maybe git-describe is misbehaving?
- pieces["error"] = ("unable to parse git-describe output: '%s'"
- % describe_out)
+ pieces["error"] = "unable to parse git-describe output: '%s'" % describe_out
return pieces
# tag
@@ -1227,10 +1249,12 @@ def git_pieces_from_vcs(tag_prefix, root, verbose, runner=run_command):
if verbose:
fmt = "tag '%s' doesn't start with prefix '%s'"
print(fmt % (full_tag, tag_prefix))
- pieces["error"] = ("tag '%s' doesn't start with prefix '%s'"
- % (full_tag, tag_prefix))
+ pieces["error"] = "tag '%s' doesn't start with prefix '%s'" % (
+ full_tag,
+ tag_prefix,
+ )
return pieces
- pieces["closest-tag"] = full_tag[len(tag_prefix):]
+ pieces["closest-tag"] = full_tag[len(tag_prefix) :]
# distance: number of commits since tag
pieces["distance"] = int(mo.group(2))
@@ -1303,15 +1327,21 @@ def versions_from_parentdir(parentdir_prefix, root, verbose):
for _ in range(3):
dirname = os.path.basename(root)
if dirname.startswith(parentdir_prefix):
- return {"version": dirname[len(parentdir_prefix):],
- "full-revisionid": None,
- "dirty": False, "error": None, "date": None}
+ return {
+ "version": dirname[len(parentdir_prefix) :],
+ "full-revisionid": None,
+ "dirty": False,
+ "error": None,
+ "date": None,
+ }
rootdirs.append(root)
root = os.path.dirname(root) # up a level
if verbose:
- print("Tried directories %s but none started with prefix %s" %
- (str(rootdirs), parentdir_prefix))
+ print(
+ "Tried directories %s but none started with prefix %s"
+ % (str(rootdirs), parentdir_prefix)
+ )
raise NotThisMethod("rootdir doesn't start with parentdir_prefix")
@@ -1340,11 +1370,13 @@ def versions_from_file(filename):
contents = f.read()
except EnvironmentError:
raise NotThisMethod("unable to read _version.py")
- mo = re.search(r"version_json = '''\n(.*)''' # END VERSION_JSON",
- contents, re.M | re.S)
+ mo = re.search(
+ r"version_json = '''\n(.*)''' # END VERSION_JSON", contents, re.M | re.S
+ )
if not mo:
- mo = re.search(r"version_json = '''\r\n(.*)''' # END VERSION_JSON",
- contents, re.M | re.S)
+ mo = re.search(
+ r"version_json = '''\r\n(.*)''' # END VERSION_JSON", contents, re.M | re.S
+ )
if not mo:
raise NotThisMethod("no version_json in _version.py")
return json.loads(mo.group(1))
@@ -1353,8 +1385,7 @@ def versions_from_file(filename):
def write_to_version_file(filename, versions):
"""Write the given version number to the given _version.py file."""
os.unlink(filename)
- contents = json.dumps(versions, sort_keys=True,
- indent=1, separators=(",", ": "))
+ contents = json.dumps(versions, sort_keys=True, indent=1, separators=(",", ": "))
with open(filename, "w") as f:
f.write(SHORT_VERSION_PY % contents)
@@ -1386,8 +1417,7 @@ def render_pep440(pieces):
rendered += ".dirty"
else:
# exception #1
- rendered = "0+untagged.%d.g%s" % (pieces["distance"],
- pieces["short"])
+ rendered = "0+untagged.%d.g%s" % (pieces["distance"], pieces["short"])
if pieces["dirty"]:
rendered += ".dirty"
return rendered
@@ -1416,8 +1446,7 @@ def render_pep440_branch(pieces):
rendered = "0"
if pieces["branch"] != "master":
rendered += ".dev0"
- rendered += "+untagged.%d.g%s" % (pieces["distance"],
- pieces["short"])
+ rendered += "+untagged.%d.g%s" % (pieces["distance"], pieces["short"])
if pieces["dirty"]:
rendered += ".dirty"
return rendered
@@ -1560,11 +1589,13 @@ def render_git_describe_long(pieces):
def render(pieces, style):
"""Render the given version pieces into the requested style."""
if pieces["error"]:
- return {"version": "unknown",
- "full-revisionid": pieces.get("long"),
- "dirty": None,
- "error": pieces["error"],
- "date": None}
+ return {
+ "version": "unknown",
+ "full-revisionid": pieces.get("long"),
+ "dirty": None,
+ "error": pieces["error"],
+ "date": None,
+ }
if not style or style == "default":
style = "pep440" # the default
@@ -1588,9 +1619,13 @@ def render(pieces, style):
else:
raise ValueError("unknown style '%s'" % style)
- return {"version": rendered, "full-revisionid": pieces["long"],
- "dirty": pieces["dirty"], "error": None,
- "date": pieces.get("date")}
+ return {
+ "version": rendered,
+ "full-revisionid": pieces["long"],
+ "dirty": pieces["dirty"],
+ "error": None,
+ "date": pieces.get("date"),
+ }
class VersioneerBadRootError(Exception):
@@ -1613,8 +1648,9 @@ def get_versions(verbose=False):
handlers = HANDLERS.get(cfg.VCS)
assert handlers, "unrecognized VCS '%s'" % cfg.VCS
verbose = verbose or cfg.verbose
- assert cfg.versionfile_source is not None, \
- "please set versioneer.versionfile_source"
+ assert (
+ cfg.versionfile_source is not None
+ ), "please set versioneer.versionfile_source"
assert cfg.tag_prefix is not None, "please set versioneer.tag_prefix"
versionfile_abs = os.path.join(root, cfg.versionfile_source)
@@ -1668,9 +1704,13 @@ def get_versions(verbose=False):
if verbose:
print("unable to compute version")
- return {"version": "0+unknown", "full-revisionid": None,
- "dirty": None, "error": "unable to compute version",
- "date": None}
+ return {
+ "version": "0+unknown",
+ "full-revisionid": None,
+ "dirty": None,
+ "error": "unable to compute version",
+ "date": None,
+ }
def get_version():
@@ -1723,6 +1763,7 @@ def run(self):
print(" date: %s" % vers.get("date"))
if vers["error"]:
print(" error: %s" % vers["error"])
+
cmds["version"] = cmd_version
# we override "build_py" in both distutils and setuptools
@@ -1741,8 +1782,8 @@ def run(self):
# setup.py egg_info -> ?
# we override different "build_py" commands for both environments
- if 'build_py' in cmds:
- _build_py = cmds['build_py']
+ if "build_py" in cmds:
+ _build_py = cmds["build_py"]
elif "setuptools" in sys.modules:
from setuptools.command.build_py import build_py as _build_py
else:
@@ -1757,14 +1798,14 @@ def run(self):
# now locate _version.py in the new build/ directory and replace
# it with an updated value
if cfg.versionfile_build:
- target_versionfile = os.path.join(self.build_lib,
- cfg.versionfile_build)
+ target_versionfile = os.path.join(self.build_lib, cfg.versionfile_build)
print("UPDATING %s" % target_versionfile)
write_to_version_file(target_versionfile, versions)
+
cmds["build_py"] = cmd_build_py
- if 'build_ext' in cmds:
- _build_ext = cmds['build_ext']
+ if "build_ext" in cmds:
+ _build_ext = cmds["build_ext"]
elif "setuptools" in sys.modules:
from setuptools.command.build_ext import build_ext as _build_ext
else:
@@ -1784,14 +1825,15 @@ def run(self):
return
# now locate _version.py in the new build/ directory and replace
# it with an updated value
- target_versionfile = os.path.join(self.build_lib,
- cfg.versionfile_build)
+ target_versionfile = os.path.join(self.build_lib, cfg.versionfile_build)
print("UPDATING %s" % target_versionfile)
write_to_version_file(target_versionfile, versions)
+
cmds["build_ext"] = cmd_build_ext
if "cx_Freeze" in sys.modules: # cx_freeze enabled?
from cx_Freeze.dist import build_exe as _build_exe
+
# nczeczulin reports that py2exe won't like the pep440-style string
# as FILEVERSION, but it can be used for PRODUCTVERSION, e.g.
# setup(console=[{
@@ -1812,17 +1854,21 @@ def run(self):
os.unlink(target_versionfile)
with open(cfg.versionfile_source, "w") as f:
LONG = LONG_VERSION_PY[cfg.VCS]
- f.write(LONG %
- {"DOLLAR": "$",
- "STYLE": cfg.style,
- "TAG_PREFIX": cfg.tag_prefix,
- "PARENTDIR_PREFIX": cfg.parentdir_prefix,
- "VERSIONFILE_SOURCE": cfg.versionfile_source,
- })
+ f.write(
+ LONG
+ % {
+ "DOLLAR": "$",
+ "STYLE": cfg.style,
+ "TAG_PREFIX": cfg.tag_prefix,
+ "PARENTDIR_PREFIX": cfg.parentdir_prefix,
+ "VERSIONFILE_SOURCE": cfg.versionfile_source,
+ }
+ )
+
cmds["build_exe"] = cmd_build_exe
del cmds["build_py"]
- if 'py2exe' in sys.modules: # py2exe enabled?
+ if "py2exe" in sys.modules: # py2exe enabled?
from py2exe.distutils_buildexe import py2exe as _py2exe
class cmd_py2exe(_py2exe):
@@ -1838,18 +1884,22 @@ def run(self):
os.unlink(target_versionfile)
with open(cfg.versionfile_source, "w") as f:
LONG = LONG_VERSION_PY[cfg.VCS]
- f.write(LONG %
- {"DOLLAR": "$",
- "STYLE": cfg.style,
- "TAG_PREFIX": cfg.tag_prefix,
- "PARENTDIR_PREFIX": cfg.parentdir_prefix,
- "VERSIONFILE_SOURCE": cfg.versionfile_source,
- })
+ f.write(
+ LONG
+ % {
+ "DOLLAR": "$",
+ "STYLE": cfg.style,
+ "TAG_PREFIX": cfg.tag_prefix,
+ "PARENTDIR_PREFIX": cfg.parentdir_prefix,
+ "VERSIONFILE_SOURCE": cfg.versionfile_source,
+ }
+ )
+
cmds["py2exe"] = cmd_py2exe
# we override different "sdist" commands for both environments
- if 'sdist' in cmds:
- _sdist = cmds['sdist']
+ if "sdist" in cmds:
+ _sdist = cmds["sdist"]
elif "setuptools" in sys.modules:
from setuptools.command.sdist import sdist as _sdist
else:
@@ -1874,8 +1924,10 @@ def make_release_tree(self, base_dir, files):
# updated value
target_versionfile = os.path.join(base_dir, cfg.versionfile_source)
print("UPDATING %s" % target_versionfile)
- write_to_version_file(target_versionfile,
- self._versioneer_generated_versions)
+ write_to_version_file(
+ target_versionfile, self._versioneer_generated_versions
+ )
+
cmds["sdist"] = cmd_sdist
return cmds
@@ -1935,11 +1987,13 @@ def do_setup():
root = get_root()
try:
cfg = get_config_from_root(root)
- except (EnvironmentError, configparser.NoSectionError,
- configparser.NoOptionError) as e:
+ except (
+ EnvironmentError,
+ configparser.NoSectionError,
+ configparser.NoOptionError,
+ ) as e:
if isinstance(e, (EnvironmentError, configparser.NoSectionError)):
- print("Adding sample versioneer config to setup.cfg",
- file=sys.stderr)
+ print("Adding sample versioneer config to setup.cfg", file=sys.stderr)
with open(os.path.join(root, "setup.cfg"), "a") as f:
f.write(SAMPLE_CONFIG)
print(CONFIG_ERROR, file=sys.stderr)
@@ -1948,15 +2002,18 @@ def do_setup():
print(" creating %s" % cfg.versionfile_source)
with open(cfg.versionfile_source, "w") as f:
LONG = LONG_VERSION_PY[cfg.VCS]
- f.write(LONG % {"DOLLAR": "$",
- "STYLE": cfg.style,
- "TAG_PREFIX": cfg.tag_prefix,
- "PARENTDIR_PREFIX": cfg.parentdir_prefix,
- "VERSIONFILE_SOURCE": cfg.versionfile_source,
- })
-
- ipy = os.path.join(os.path.dirname(cfg.versionfile_source),
- "__init__.py")
+ f.write(
+ LONG
+ % {
+ "DOLLAR": "$",
+ "STYLE": cfg.style,
+ "TAG_PREFIX": cfg.tag_prefix,
+ "PARENTDIR_PREFIX": cfg.parentdir_prefix,
+ "VERSIONFILE_SOURCE": cfg.versionfile_source,
+ }
+ )
+
+ ipy = os.path.join(os.path.dirname(cfg.versionfile_source), "__init__.py")
if os.path.exists(ipy):
try:
with open(ipy, "r") as f:
@@ -2004,8 +2061,10 @@ def do_setup():
else:
print(" 'versioneer.py' already in MANIFEST.in")
if cfg.versionfile_source not in simple_includes:
- print(" appending versionfile_source ('%s') to MANIFEST.in" %
- cfg.versionfile_source)
+ print(
+ " appending versionfile_source ('%s') to MANIFEST.in"
+ % cfg.versionfile_source
+ )
with open(manifest_in, "a") as f:
f.write("include %s\n" % cfg.versionfile_source)
else: