diff --git a/docs/conf.py b/docs/conf.py index 79ce90f1..dd5329cc 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -16,15 +16,15 @@ # add these directories to sys.path here. If the directory is relative to the # documentation root, use os.path.abspath to make it absolute, like shown here. # -import sys, os -import sphinx_bootstrap_theme - +import os +import sys -sys.path.insert(0, os.path.abspath('../')) +import sphinx_bootstrap_theme -#import your package to obtain the version info to display on the docs website -import esda +sys.path.insert(0, os.path.abspath("../")) +# import your package to obtain the version info to display on the docs website +import esda # noqa E402 # -- General configuration ------------------------------------------------ @@ -34,43 +34,44 @@ # Add any Sphinx extension module names here, as strings. They can be # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom # ones. -extensions = [#'sphinx_gallery.gen_gallery', - 'sphinx.ext.autodoc', - 'sphinx.ext.autosummary', - 'sphinx.ext.viewcode', - 'sphinxcontrib.bibtex', - 'sphinx.ext.mathjax', - 'sphinx.ext.doctest', - 'sphinx.ext.intersphinx', - 'numpydoc', - 'matplotlib.sphinxext.plot_directive', - 'nbsphinx'] +extensions = [ # 'sphinx_gallery.gen_gallery', + "sphinx.ext.autodoc", + "sphinx.ext.autosummary", + "sphinx.ext.viewcode", + "sphinxcontrib.bibtex", + "sphinx.ext.mathjax", + "sphinx.ext.doctest", + "sphinx.ext.intersphinx", + "numpydoc", + "matplotlib.sphinxext.plot_directive", + "nbsphinx", +] # Add any paths that contain templates here, relative to this directory. -templates_path = ['_templates'] +templates_path = ["_templates"] # The suffix(es) of source filenames. # You can specify multiple suffix as a list of string: # # source_suffix = ['.rst', '.md'] -source_suffix = '.rst' +source_suffix = ".rst" # The master toctree document. -master_doc = 'index' +master_doc = "index" # General information about the project. project = "esda" # string of your project name, for example, 'giddy' -copyright = '2018, pysal developers' -author = 'pysal developers' +copyright = "2018, pysal developers" +author = "pysal developers" # The version info for the project you're documenting, acts as replacement for # |version| and |release|, also used in various other places throughout the # built documents. # # The full version. -version = esda.__version__ #should replace it with your PACKAGE_NAME -release = esda.__version__ #should replace it with your PACKAGE_NAME +version = esda.__version__ # should replace it with your PACKAGE_NAME +release = esda.__version__ # should replace it with your PACKAGE_NAME # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. @@ -82,10 +83,10 @@ # List of patterns, relative to source directory, that match files and # directories to ignore when looking for source files. # This patterns also effect to html_static_path and html_extra_path -exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store', 'tests/*'] +exclude_patterns = ["_build", "Thumbs.db", ".DS_Store", "tests/*"] # The name of the Pygments (syntax highlighting) style to use. -pygments_style = 'sphinx' +pygments_style = "sphinx" # If true, `todo` and `todoList` produce output, else they produce nothing. todo_include_todos = False @@ -96,13 +97,14 @@ # a list of builtin themes. # # html_theme = 'alabaster' -html_theme = 'bootstrap' +html_theme = "bootstrap" html_theme_path = sphinx_bootstrap_theme.get_html_theme_path() html_title = "%s v%s Manual" % (project, version) -# (Optional) Logo of your package. Should be small enough to fit the navbar (ideally 24x24). +# (Optional) Logo of your package. +# Should be small enough to fit the navbar (ideally 24x24). # Path should be relative to the ``_static`` files directory. -#html_logo = "_static/images/package_logo.jpg" +# html_logo = "_static/images/package_logo.jpg" # (Optional) PySAL favicon html_favicon = "_static/images/pysal_favicon.ico" @@ -113,27 +115,20 @@ # documentation. # html_theme_options = { - # Navigation bar title. (Default: ``project`` value) - 'navbar_title': "esda", # string of your project name, for example, 'giddy' - + "navbar_title": "esda", # string of your project name, for example, 'giddy' # Render the next and previous page links in navbar. (Default: true) - 'navbar_sidebarrel': False, - + "navbar_sidebarrel": False, # Render the current pages TOC in the navbar. (Default: true) - #'navbar_pagenav': True, - #'navbar_pagenav': False, - + # 'navbar_pagenav': True, + # 'navbar_pagenav': False, # No sidebar - 'nosidebar': True, - + "nosidebar": True, # Tab name for the current pages TOC. (Default: "Page") - #'navbar_pagenav_name': "Page", - + # 'navbar_pagenav_name': "Page", # Global TOC depth for "site" navbar tab. (Default: 1) # Switching to -1 shows all levels. - 'globaltoc_depth': 2, - + "globaltoc_depth": 2, # Include hidden TOCs in Site navbar? # # Note: If this is "false", you cannot have mixed ``:hidden:`` and @@ -141,54 +136,46 @@ # will break. # # Values: "true" (default) or "false" - 'globaltoc_includehidden': "true", - + "globaltoc_includehidden": "true", # HTML navbar class (Default: "navbar") to attach to
element. # For black navbar, do "navbar navbar-inverse" - #'navbar_class': "navbar navbar-inverse", - + # 'navbar_class': "navbar navbar-inverse", # Fix navigation bar to top of page? # Values: "true" (default) or "false" - 'navbar_fixed_top': "true", - - + "navbar_fixed_top": "true", # Location of link to source. # Options are "nav" (default), "footer" or anything else to exclude. - 'source_link_position': 'footer', - + "source_link_position": "footer", # Bootswatch (http://bootswatch.com/) theme. # # Options are nothing (default) or the name of a valid theme # such as "amelia" or "cosmo", "yeti", "flatly". - 'bootswatch_theme': "yeti", - + "bootswatch_theme": "yeti", # Choose Bootstrap version. # Values: "3" (default) or "2" (in quotes) - 'bootstrap_version': "3", - + "bootstrap_version": "3", # Navigation bar menu - 'navbar_links': [ - ("Installation", "installation"), - ("Tutorial", "tutorial"), - ("API", "api"), - ("References", "references"), - ], - + "navbar_links": [ + ("Installation", "installation"), + ("Tutorial", "tutorial"), + ("API", "api"), + ("References", "references"), + ], } # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, # so a file named "default.css" will overwrite the builtin "default.css". -html_static_path = ['_static'] +html_static_path = ["_static"] # Custom sidebar templates, maps document names to template names. -#html_sidebars = {} +# html_sidebars = {} # html_sidebars = {'sidebar': ['localtoc.html', 'sourcelink.html', 'searchbox.html']} # -- Options for HTMLHelp output ------------------------------------------ # Output file base name for HTML help builder. -htmlhelp_basename = 'esda'+'doc' +htmlhelp_basename = "esda" + "doc" # -- Options for LaTeX output --------------------------------------------- @@ -197,15 +184,12 @@ # The paper size ('letterpaper' or 'a4paper'). # # 'papersize': 'letterpaper', - # The font size ('10pt', '11pt' or '12pt'). # # 'pointsize': '10pt', - # Additional stuff for the LaTeX preamble. # # 'preamble': '', - # Latex figure (float) alignment # # 'figure_align': 'htbp', @@ -215,8 +199,7 @@ # (source start file, target name, title, # author, documentclass [howto, manual, or own class]). latex_documents = [ - (master_doc, 'esda.tex', u'esda Documentation', - u'pysal developers', 'manual'), + (master_doc, "esda.tex", "esda Documentation", "pysal developers", "manual"), ] @@ -224,10 +207,7 @@ # One entry per manual page. List of tuples # (source start file, name, description, authors, manual section). -man_pages = [ - (master_doc, 'esda', u'esda Documentation', - [author], 1) -] +man_pages = [(master_doc, "esda", "esda Documentation", [author], 1)] # -- Options for Texinfo output ------------------------------------------- @@ -236,9 +216,15 @@ # (source start file, target name, title, author, # dir menu entry, description, category) texinfo_documents = [ - (master_doc, 'esda', u'esda Documentation', - author, 'esda developers', 'One line description of project.', - 'Miscellaneous'), + ( + master_doc, + "esda", + "esda Documentation", + author, + "esda developers", + "One line description of project.", + "Miscellaneous", + ), ] @@ -264,22 +250,34 @@ def setup(app): # Example configuration for intersphinx: refer to the Python standard library. intersphinx_mapping = { - 'python': ('https://docs.python.org/3/', None), - 'numpy': ('https://docs.scipy.org/doc/numpy', None), - 'pandas': ('https://pandas.pydata.org/pandas-docs/stable/', None), - 'geopandas': ('https://geopandas.readthedocs.io/en/latest/', None), - 'sklearn': ('https://scikit-learn.org/stable/', None), - 'giddy': ('https://giddy.readthedocs.io/en/latest/', None), - 'libpysal': ('https://pysal.org/libpysal/', None), - 'esda': ('https://esda.readthedocs.io/en/latest/', None), - 'region': ('https://region.readthedocs.io/en/latest/', None), - 'hdbscan': ('https://hdbscan.readthedocs.io/en/latest/', None) - + "python": ("https://docs.python.org/3/", None), + "numpy": ("https://docs.scipy.org/doc/numpy", None), + "pandas": ("https://pandas.pydata.org/pandas-docs/stable/", None), + "geopandas": ("https://geopandas.readthedocs.io/en/latest/", None), + "sklearn": ("https://scikit-learn.org/stable/", None), + "giddy": ("https://giddy.readthedocs.io/en/latest/", None), + "libpysal": ("https://pysal.org/libpysal/", None), + "esda": ("https://esda.readthedocs.io/en/latest/", None), + "region": ("https://region.readthedocs.io/en/latest/", None), + "hdbscan": ("https://hdbscan.readthedocs.io/en/latest/", None), } -numpydoc_xref_ignore = {'type', 'optional', 'default', 'shape', 'fitted', 'instance', - 'cluster', 'of', 'or', 'if', 'using', 'otherwise', 'required', - 'from'} +numpydoc_xref_ignore = { + "type", + "optional", + "default", + "shape", + "fitted", + "instance", + "cluster", + "of", + "or", + "if", + "using", + "otherwise", + "required", + "from", +} # This is processed by Jinja2 and inserted before each notebook @@ -306,7 +304,7 @@ def setup(app): \nbsphinxstartnotebook{\scriptsize\noindent\strut \textcolor{gray}{The following section was generated from \sphinxcode{\sphinxupquote{\strut {{ docname | escape_latex }}}} \dotfill}} -""" +""" # noqa E501 # This is processed by Jinja2 and inserted after each notebook nbsphinx_epilog = r""" diff --git a/esda/__init__.py b/esda/__init__.py index f12d3e4f..802c0fc8 100644 --- a/esda/__init__.py +++ b/esda/__init__.py @@ -3,40 +3,37 @@ ================================================= """ -from . import adbscan -from .gamma import Gamma -from .geary import Geary -from .geary_local import Geary_Local -from .geary_local_mv import Geary_Local_MV -from .getisord import G, G_Local -from .join_counts import Join_Counts -from .join_counts_local import Join_Counts_Local -from .join_counts_local_bv import Join_Counts_Local_BV -from .join_counts_local_mv import Join_Counts_Local_MV -from .lee import Spatial_Pearson, Spatial_Pearson_Local -from .losh import LOSH -from .map_comparison import ( - external_entropy, +from . import _version, adbscan, shape # noqa F401 +from .gamma import Gamma # noqa F401 +from .geary import Geary # noqa F401 +from .geary_local import Geary_Local # noqa F401 +from .geary_local_mv import Geary_Local_MV # noqa F401 +from .getisord import G, G_Local # noqa F401 +from .join_counts import Join_Counts # noqa F401 +from .join_counts_local import Join_Counts_Local # noqa F401 +from .join_counts_local_bv import Join_Counts_Local_BV # noqa F401 +from .join_counts_local_mv import Join_Counts_Local_MV # noqa F401 +from .lee import Spatial_Pearson, Spatial_Pearson_Local # noqa F401 +from .losh import LOSH # noqa F401 +from .map_comparison import ( # noqa F401 + areal_entropy, completeness, + external_entropy, homogeneity, overlay_entropy, - areal_entropy, ) -from .moran import ( +from .moran import ( # noqa F401 Moran, Moran_BV, Moran_BV_matrix, Moran_Local, Moran_Local_BV, - Moran_Rate, Moran_Local_Rate, + Moran_Rate, ) -from .silhouettes import path_silhouette, boundary_silhouette -from . import shape -from .smaup import Smaup -from .topo import prominence, isolation -from .util import fdr - -from . import _version +from .silhouettes import boundary_silhouette, path_silhouette # noqa F401 +from .smaup import Smaup # noqa F401 +from .topo import isolation, prominence # noqa F401 +from .util import fdr # noqa F401 __version__ = _version.get_versions()["version"] diff --git a/esda/_version.py b/esda/_version.py index d94bdb6d..bc5c4cfe 100644 --- a/esda/_version.py +++ b/esda/_version.py @@ -1,4 +1,3 @@ - # This file helps to compute a version number in source trees obtained from # git-archive tarball (such as those provided by githubs download-from-tag # feature). Distribution tarballs (built by setup.py sdist) and build @@ -58,18 +57,19 @@ class NotThisMethod(Exception): def register_vcs_handler(vcs, method): # decorator """Create decorator to mark a method as the handler of a VCS.""" + def decorate(f): """Store f in HANDLERS[vcs][method].""" if vcs not in HANDLERS: HANDLERS[vcs] = {} HANDLERS[vcs][method] = f return f + return decorate # pylint:disable=too-many-arguments,consider-using-with # noqa -def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False, - env=None): +def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False, env=None): """Call the given command(s).""" assert isinstance(commands, list) process = None @@ -77,10 +77,13 @@ def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False, try: dispcmd = str([command] + args) # remember shell=False, so use git.cmd on windows, not just git - process = subprocess.Popen([command] + args, cwd=cwd, env=env, - stdout=subprocess.PIPE, - stderr=(subprocess.PIPE if hide_stderr - else None)) + process = subprocess.Popen( + [command] + args, + cwd=cwd, + env=env, + stdout=subprocess.PIPE, + stderr=(subprocess.PIPE if hide_stderr else None), + ) break except EnvironmentError: e = sys.exc_info()[1] @@ -115,15 +118,21 @@ def versions_from_parentdir(parentdir_prefix, root, verbose): for _ in range(3): dirname = os.path.basename(root) if dirname.startswith(parentdir_prefix): - return {"version": dirname[len(parentdir_prefix):], - "full-revisionid": None, - "dirty": False, "error": None, "date": None} + return { + "version": dirname[len(parentdir_prefix) :], + "full-revisionid": None, + "dirty": False, + "error": None, + "date": None, + } rootdirs.append(root) root = os.path.dirname(root) # up a level if verbose: - print("Tried directories %s but none started with prefix %s" % - (str(rootdirs), parentdir_prefix)) + print( + "Tried directories %s but none started with prefix %s" + % (str(rootdirs), parentdir_prefix) + ) raise NotThisMethod("rootdir doesn't start with parentdir_prefix") @@ -182,7 +191,7 @@ def git_versions_from_keywords(keywords, tag_prefix, verbose): # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of # just "foo-1.0". If we see a "tag: " prefix, prefer those. TAG = "tag: " - tags = {r[len(TAG):] for r in refs if r.startswith(TAG)} + tags = {r[len(TAG) :] for r in refs if r.startswith(TAG)} if not tags: # Either we're using git < 1.8.3, or there really are no tags. We use # a heuristic: assume all version tags have a digit. The old git %d @@ -191,7 +200,7 @@ def git_versions_from_keywords(keywords, tag_prefix, verbose): # between branches and tags. By ignoring refnames without digits, we # filter out many common branch names like "release" and # "stabilization", as well as "HEAD" and "master". - tags = {r for r in refs if re.search(r'\d', r)} + tags = {r for r in refs if re.search(r"\d", r)} if verbose: print("discarding '%s', no digits" % ",".join(refs - tags)) if verbose: @@ -199,24 +208,31 @@ def git_versions_from_keywords(keywords, tag_prefix, verbose): for ref in sorted(tags): # sorting will prefer e.g. "2.0" over "2.0rc1" if ref.startswith(tag_prefix): - r = ref[len(tag_prefix):] + r = ref[len(tag_prefix) :] # Filter out refs that exactly match prefix or that don't start # with a number once the prefix is stripped (mostly a concern # when prefix is '') - if not re.match(r'\d', r): + if not re.match(r"\d", r): continue if verbose: print("picking %s" % r) - return {"version": r, - "full-revisionid": keywords["full"].strip(), - "dirty": False, "error": None, - "date": date} + return { + "version": r, + "full-revisionid": keywords["full"].strip(), + "dirty": False, + "error": None, + "date": date, + } # no suitable tags, so version is "0+unknown", but full hex is still there if verbose: print("no suitable tags, using unknown + full revision id") - return {"version": "0+unknown", - "full-revisionid": keywords["full"].strip(), - "dirty": False, "error": "no suitable tags", "date": None} + return { + "version": "0+unknown", + "full-revisionid": keywords["full"].strip(), + "dirty": False, + "error": "no suitable tags", + "date": None, + } @register_vcs_handler("git", "pieces_from_vcs") @@ -231,8 +247,7 @@ def git_pieces_from_vcs(tag_prefix, root, verbose, runner=run_command): if sys.platform == "win32": GITS = ["git.cmd", "git.exe"] - _, rc = runner(GITS, ["rev-parse", "--git-dir"], cwd=root, - hide_stderr=True) + _, rc = runner(GITS, ["rev-parse", "--git-dir"], cwd=root, hide_stderr=True) if rc != 0: if verbose: print("Directory %s not under git control" % root) @@ -240,10 +255,19 @@ def git_pieces_from_vcs(tag_prefix, root, verbose, runner=run_command): # if there is a tag matching tag_prefix, this yields TAG-NUM-gHEX[-dirty] # if there isn't one, this yields HEX[-dirty] (no NUM) - describe_out, rc = runner(GITS, ["describe", "--tags", "--dirty", - "--always", "--long", - "--match", "%s*" % tag_prefix], - cwd=root) + describe_out, rc = runner( + GITS, + [ + "describe", + "--tags", + "--dirty", + "--always", + "--long", + "--match", + "%s*" % tag_prefix, + ], + cwd=root, + ) # --long was added in git-1.5.5 if describe_out is None: raise NotThisMethod("'git describe' failed") @@ -258,8 +282,7 @@ def git_pieces_from_vcs(tag_prefix, root, verbose, runner=run_command): pieces["short"] = full_out[:7] # maybe improved later pieces["error"] = None - branch_name, rc = runner(GITS, ["rev-parse", "--abbrev-ref", "HEAD"], - cwd=root) + branch_name, rc = runner(GITS, ["rev-parse", "--abbrev-ref", "HEAD"], cwd=root) # --abbrev-ref was added in git-1.6.3 if rc != 0 or branch_name is None: raise NotThisMethod("'git rev-parse --abbrev-ref' returned error") @@ -299,17 +322,16 @@ def git_pieces_from_vcs(tag_prefix, root, verbose, runner=run_command): dirty = git_describe.endswith("-dirty") pieces["dirty"] = dirty if dirty: - git_describe = git_describe[:git_describe.rindex("-dirty")] + git_describe = git_describe[: git_describe.rindex("-dirty")] # now we have TAG-NUM-gHEX or HEX if "-" in git_describe: # TAG-NUM-gHEX - mo = re.search(r'^(.+)-(\d+)-g([0-9a-f]+)$', git_describe) + mo = re.search(r"^(.+)-(\d+)-g([0-9a-f]+)$", git_describe) if not mo: # unparseable. Maybe git-describe is misbehaving? - pieces["error"] = ("unable to parse git-describe output: '%s'" - % describe_out) + pieces["error"] = "unable to parse git-describe output: '%s'" % describe_out return pieces # tag @@ -318,10 +340,12 @@ def git_pieces_from_vcs(tag_prefix, root, verbose, runner=run_command): if verbose: fmt = "tag '%s' doesn't start with prefix '%s'" print(fmt % (full_tag, tag_prefix)) - pieces["error"] = ("tag '%s' doesn't start with prefix '%s'" - % (full_tag, tag_prefix)) + pieces["error"] = "tag '%s' doesn't start with prefix '%s'" % ( + full_tag, + tag_prefix, + ) return pieces - pieces["closest-tag"] = full_tag[len(tag_prefix):] + pieces["closest-tag"] = full_tag[len(tag_prefix) :] # distance: number of commits since tag pieces["distance"] = int(mo.group(2)) @@ -370,8 +394,7 @@ def render_pep440(pieces): rendered += ".dirty" else: # exception #1 - rendered = "0+untagged.%d.g%s" % (pieces["distance"], - pieces["short"]) + rendered = "0+untagged.%d.g%s" % (pieces["distance"], pieces["short"]) if pieces["dirty"]: rendered += ".dirty" return rendered @@ -400,8 +423,7 @@ def render_pep440_branch(pieces): rendered = "0" if pieces["branch"] != "master": rendered += ".dev0" - rendered += "+untagged.%d.g%s" % (pieces["distance"], - pieces["short"]) + rendered += "+untagged.%d.g%s" % (pieces["distance"], pieces["short"]) if pieces["dirty"]: rendered += ".dirty" return rendered @@ -544,11 +566,13 @@ def render_git_describe_long(pieces): def render(pieces, style): """Render the given version pieces into the requested style.""" if pieces["error"]: - return {"version": "unknown", - "full-revisionid": pieces.get("long"), - "dirty": None, - "error": pieces["error"], - "date": None} + return { + "version": "unknown", + "full-revisionid": pieces.get("long"), + "dirty": None, + "error": pieces["error"], + "date": None, + } if not style or style == "default": style = "pep440" # the default @@ -572,9 +596,13 @@ def render(pieces, style): else: raise ValueError("unknown style '%s'" % style) - return {"version": rendered, "full-revisionid": pieces["long"], - "dirty": pieces["dirty"], "error": None, - "date": pieces.get("date")} + return { + "version": rendered, + "full-revisionid": pieces["long"], + "dirty": pieces["dirty"], + "error": None, + "date": pieces.get("date"), + } def get_versions(): @@ -588,8 +616,7 @@ def get_versions(): verbose = cfg.verbose try: - return git_versions_from_keywords(get_keywords(), cfg.tag_prefix, - verbose) + return git_versions_from_keywords(get_keywords(), cfg.tag_prefix, verbose) except NotThisMethod: pass @@ -598,13 +625,16 @@ def get_versions(): # versionfile_source is the relative path from the top of the source # tree (where the .git directory might live) to this file. Invert # this to find the root from __file__. - for _ in cfg.versionfile_source.split('/'): + for _ in cfg.versionfile_source.split("/"): root = os.path.dirname(root) except NameError: - return {"version": "0+unknown", "full-revisionid": None, - "dirty": None, - "error": "unable to find root of source tree", - "date": None} + return { + "version": "0+unknown", + "full-revisionid": None, + "dirty": None, + "error": "unable to find root of source tree", + "date": None, + } try: pieces = git_pieces_from_vcs(cfg.tag_prefix, root, verbose) @@ -618,6 +648,10 @@ def get_versions(): except NotThisMethod: pass - return {"version": "0+unknown", "full-revisionid": None, - "dirty": None, - "error": "unable to compute version", "date": None} + return { + "version": "0+unknown", + "full-revisionid": None, + "dirty": None, + "error": "unable to compute version", + "date": None, + } diff --git a/esda/adbscan.py b/esda/adbscan.py index a1a0a39f..dbb69e26 100644 --- a/esda/adbscan.py +++ b/esda/adbscan.py @@ -5,14 +5,16 @@ __author__ = "Dani Arribas-Bel " import warnings -import pandas +from collections import Counter + import numpy as np +import pandas from libpysal.cg.alpha_shapes import alpha_shape_auto from scipy.spatial import cKDTree -from collections import Counter +from sklearn.base import BaseEstimator as _BaseEstimator +from sklearn.base import ClusterMixin as _ClusterMixin from sklearn.cluster import DBSCAN from sklearn.neighbors import KNeighborsClassifier -from sklearn.base import BaseEstimator as _BaseEstimator, ClusterMixin as _ClusterMixin __all__ = ["ADBSCAN", "remap_lbls", "ensemble", "get_cluster_boundary"] @@ -32,52 +34,50 @@ class ADBSCAN(_ClusterMixin, _BaseEstimator): Parameters ---------- - eps : float - The maximum distance between two samples for them to be considered - as in the same neighborhood. - min_samples : int - The number of samples (or total weight) in a neighborhood - for a point to be considered as a core point. This includes the - point itself. - algorithm : {'auto', 'ball_tree', 'kd_tree', 'brute'}, optional - The algorithm to be used by the NearestNeighbors module - to compute pointwise distances and find nearest neighbors. - See NearestNeighbors module documentation for details. - n_jobs : int - [Optional. Default=1] The number of parallel jobs to run. If - -1, then the number of jobs is set to the number of CPU - cores. - pct_exact : float - [Optional. Default=0.1] Proportion of the entire dataset - used to calculate DBSCAN in each draw - reps : int - [Optional. Default=100] Number of random samples to draw in order to - build final solution - keep_solus : Boolean - [Optional. Default=False] If True, the `solus` and - `solus_relabelled` objects are kept, else it is deleted to - save memory - pct_thr : float - [Optional. Default=0.9] Minimum proportion of replications that a non-noise - label need to be assigned to an observation for that observation to be labelled - as such + eps : float + The maximum distance between two samples for them to be considered + as in the same neighborhood. + min_samples : int + The number of samples (or total weight) in a neighborhood + for a point to be considered as a core point. This includes the + point itself. + algorithm : {'auto', 'ball_tree', 'kd_tree', 'brute'}, optional + The algorithm to be used by the NearestNeighbors module + to compute pointwise distances and find nearest neighbors. + See NearestNeighbors module documentation for details. + n_jobs : int + [Optional. Default=1] The number of parallel jobs to run. If -1, then + the number of jobs is set to the number of CPU cores. + pct_exact : float + [Optional. Default=0.1] Proportion of the entire dataset + used to calculate DBSCAN in each draw + reps : int + [Optional. Default=100] Number of random samples to draw in order to + build final solution + keep_solus : bool + [Optional. Default=False] If True, the `solus` and `solus_relabelled` + objects are kept, else it is deleted to save memory + pct_thr : float + [Optional. Default=0.9] Minimum proportion of replications that + a non-noise label need to be assigned to an observation for that + observation to be labelled as such Attributes ---------- - labels_ : array - [Only available after `fit`] Cluster labels for each point in the - dataset given to fit(). - Noisy (if the proportion of the most common label is < pct_thr) - samples are given the label -1. - votes : DataFrame - [Only available after `fit`] Table indexed on `X.index` with - `labels_` under the `lbls` column, and the frequency across draws of - that label under `pct` - solus : DataFrame, shape = [n, reps] - [Only available after `fit`] Each solution of labels for every draw - solus_relabelled: DataFrame, shape = [n, reps] - [Only available after `fit`] Each solution of labels for - every draw, relabelled to be consistent across solutions + labels_ : array + [Only available after `fit`] Cluster labels for each point in the + dataset given to fit(). + Noisy (if the proportion of the most common label is < pct_thr) + samples are given the label -1. + votes : DataFrame + [Only available after `fit`] Table indexed on `X.index` with + `labels_` under the `lbls` column, and the frequency across draws of + that label under `pct` + solus : DataFrame, shape = [n, reps] + [Only available after `fit`] Each solution of labels for every draw + solus_relabelled : DataFrame, shape = [n, reps] + [Only available after `fit`] Each solution of labels for + every draw, relabelled to be consistent across solutions Examples -------- @@ -185,14 +185,11 @@ def fit(self, X, y=None, sample_weight=None, xy=["X", "Y"]): ) # Multi-core implementation of parallel draws if (self.n_jobs == -1) or (self.n_jobs > 1): - pool = _setup_pool(self.n_jobs) # Set different parallel seeds!!! - warn_msg = ( - "Multi-core implementation only works on " - "relabelling solutions. Execution of draws " - "is still sequential." + warnings.warn( + "Multi-core implementation only works on relabelling solutions. " + "Execution of draws is still sequential." ) - warnings.warn(warn_msg) for i in range(self.reps): pars = ( n, @@ -416,8 +413,10 @@ def ensemble(solus_relabelled): 4 7 0.67 """ - f = lambda a: Counter(a).most_common(1)[0] - counts = np.array(list(map(f, solus_relabelled.values))) + + counts = np.array( + list(map(lambda a: Counter(a).most_common(1)[0], solus_relabelled.values)) + ) winner = counts[:, 0] votes = counts[:, 1].astype(int) / solus_relabelled.shape[1] pred = pandas.DataFrame( @@ -501,7 +500,8 @@ def get_cluster_boundary(labels, xys, xy=["X", "Y"], n_jobs=1, crs=None, step=1) >>> polys = get_cluster_boundary(labels, db) >>> polys[0].wkt 'POLYGON ((0.7217553174317995 0.8192869956700687, 0.7605307121989587 0.9086488808086682, 0.9177741225129434 0.8568503024577332, 0.8126209616521135 0.6262871483113925, 0.6125260668293881 0.5475861559192435, 0.5425443680112613 0.7546476915298572, 0.7217553174317995 0.8192869956700687))' - """ + """ # noqa E501 + try: from geopandas import GeoSeries except ModuleNotFoundError: diff --git a/esda/crand.py b/esda/crand.py index eeb7e615..d4413f4c 100644 --- a/esda/crand.py +++ b/esda/crand.py @@ -3,11 +3,12 @@ """ import os -import numpy as np import warnings +import numpy as np + try: - from numba import njit, jit, prange, boolean + from numba import boolean, jit, njit, prange except (ImportError, ModuleNotFoundError): def jit(*dec_args, **dec_kwargs): @@ -25,7 +26,6 @@ def intercepted_function(f, *f_args, **f_kwargs): prange = range boolean = bool -import tempfile __all__ = ["crand"] @@ -119,8 +119,8 @@ def crand( p_sim : ndarray (N,) array with pseudo p-values from conditional permutation rlocals : ndarray - If keep=True, (N, permutations) array with simulated values of stat_func under the - null of spatial randomness; else, empty (1, 1) array + If keep=True, (N, permutations) array with simulated values + of stat_func under the null of spatial randomness; else, empty (1, 1) array """ adj_matrix = w.sparse @@ -153,28 +153,28 @@ def crand( # we need to be careful to shuffle only *other* sites, not # the self-site. This means we need to - ### extract the self-weight, if any + # extract the self-weight, if any self_weights = adj_matrix.diagonal() - ### force the self-site weight to zero + # force the self-site weight to zero with warnings.catch_warnings(): # massive changes to sparsity incur a cost, but it's not # large for simply changing the diag warnings.simplefilter("ignore") adj_matrix.setdiag(0) adj_matrix.eliminate_zeros() - ### extract the weights from a now no-self-weighted adj_matrix + # extract the weights from a now no-self-weighted adj_matrix other_weights = adj_matrix.data - ### use the non-self weight as the cardinality, since - ### this is the set we have to randomize. - ### if there is a self-neighbor, we need to *not* shuffle the - ### self neighbor, since conditional randomization conditions on site i. + # use the non-self weight as the cardinality, since + # this is the set we have to randomize. + # if there is a self-neighbor, we need to *not* shuffle the + # self neighbor, since conditional randomization conditions on site i. cardinalities = np.array((adj_matrix != 0).sum(1)).flatten() max_card = cardinalities.max() permuted_ids = vec_permutations(max_card, n, permutations, seed) if n_jobs != 1: try: - import joblib + import joblib # noqa F401 except (ModuleNotFoundError, ImportError): warnings.warn( f"Parallel processing is requested (n_jobs={n_jobs})," @@ -288,9 +288,9 @@ def compute_chunk( scaling : float Scaling value to apply to every local statistic island_weight: - value to use as a weight for the "fake" neighbor for every island. If numpy.nan, - will propagate to the final local statistic depending on the `stat_func`. If 0, then - the lag is always zero for islands. + value to use as a weight for the "fake" neighbor for every island. + If numpy.nan, will propagate to the final local statistic depending + on the `stat_func`. If 0, then the lag is always zero for islands. Returns ------- @@ -315,27 +315,19 @@ def compute_chunk( for i in range(chunk_n): cardinality = cardinalities[i] if cardinality == 0: # deal with islands - effective_cardinality = 1 weights_i = np.zeros(2, dtype=other_weights.dtype) weights_i[1] = island_weight else: - effective_cardinality = cardinality # we need to fix the self-weight to the first position weights_i = np.zeros(cardinality + 1, dtype=other_weights.dtype) weights_i[0] = self_weights[i] - ### this chomps the next `cardinality` weights off of `weights` - weights_i[1:] = other_weights[wloc : (wloc + cardinality)] + # this chomps the next `cardinality` weights off of `weights` + weights_i[1:] = other_weights[wloc : (wloc + cardinality)] # noqa E203 wloc += cardinality - z_chunk_i = z_chunk[i] mask[chunk_start + i] = False - z_no_i = z[ - mask, - ] rstats = stat_func(chunk_start + i, z, permuted_ids, weights_i, scaling) if keep: - rlocals[ - i, - ] = rstats + rlocals[i] = rstats larger[i] = np.sum(rstats >= observed[i]) return larger, rlocals @@ -370,7 +362,7 @@ def build_weights_offsets(cardinalities: np.ndarray, n_chunks: int): chunk_size = np.int64(n / n_chunks) + 1 start = 0 for i in range(n_chunks): - advance = cardinalities[start : start + chunk_size].sum() + advance = cardinalities[start : start + chunk_size].sum() # noqa E203 boundary_points[i + 1] = boundary_points[i] + advance start += chunk_size return boundary_points @@ -432,11 +424,13 @@ def chunk_generator( chunk_size = starts[1] - starts[0] for i in range(n_jobs): start = starts[i] - z_chunk = z[start : (start + chunk_size)] - self_weights_chunk = self_weights[start : (start + chunk_size)] - observed_chunk = observed[start : (start + chunk_size)] - cardinalities_chunk = cardinalities[start : (start + chunk_size)] - w_chunk = other_weights[w_boundary_points[i] : w_boundary_points[i + 1]] + z_chunk = z[start : (start + chunk_size)] # noqa E203 + self_weights_chunk = self_weights[start : (start + chunk_size)] # noqa E203 + observed_chunk = observed[start : (start + chunk_size)] # noqa E203 + cardinalities_chunk = cardinalities[start : (start + chunk_size)] # noqa E203 + w_chunk = other_weights[ + w_boundary_points[i] : w_boundary_points[i + 1] # noqa E203 + ] yield ( start, z_chunk, @@ -507,9 +501,9 @@ def parallel_crand( scaling : float Scaling value to apply to every local statistic island_weight: - value to use as a weight for the "fake" neighbor for every island. If numpy.nan, - will propagate to the final local statistic depending on the `stat_func`. If 0, then - the lag is always zero for islands. + value to use as a weight for the "fake" neighbor for every island. + If numpy.nan, will propagate to the final local statistic depending + on the `stat_func`. If 0, then the lag is always zero for islands. Returns ------- larger : ndarray diff --git a/esda/gamma.py b/esda/gamma.py index 7ca8342c..f6733b87 100644 --- a/esda/gamma.py +++ b/esda/gamma.py @@ -5,16 +5,14 @@ """ __author__ = "Luc Anselin Serge Rey " +import warnings + import numpy as np from libpysal.weights.spatial_lag import lag_spatial + +from .crand import _prepare_univariate +from .crand import njit as _njit from .tabular import _univariate_handler -import warnings -from .crand import ( - crand as _crand_plus, - njit as _njit, - _prepare_univariate, - _prepare_bivariate, -) __all__ = ["Gamma"] @@ -215,7 +213,7 @@ def __calc(self, z, op): g = (z * zl).sum() elif op == "s": # squared difference zs = np.zeros(z.shape) - z2 = z ** 2 + z2 = z**2 for i, i0 in enumerate(self.w.id_order): neighbors = self.w.neighbor_offsets[i0] wijs = self.w.weights[i0] diff --git a/esda/geary.py b/esda/geary.py index a8dda077..e1b07876 100644 --- a/esda/geary.py +++ b/esda/geary.py @@ -6,6 +6,7 @@ import numpy as np import scipy.stats as stats from libpysal import weights + from .tabular import _univariate_handler __all__ = ["Geary"] @@ -144,13 +145,13 @@ def __init__(self, y, w, transformation="r", permutations=999): self.p_sim = (larger + 1.0) / (permutations + 1.0) self.EC_sim = sum(sim) / permutations self.seC_sim = np.array(sim).std() - self.VC_sim = self.seC_sim ** 2 + self.VC_sim = self.seC_sim**2 self.z_sim = (self.C - self.EC_sim) / self.seC_sim self.p_z_sim = 1 - stats.norm.cdf(np.abs(self.z_sim)) @property def _statistic(self): - """ a standardized accessor for esda statistics""" + """a standardized accessor for esda statistics""" return self.C def __moments(self): @@ -162,8 +163,8 @@ def __moments(self): s2 = w.s2 s02 = s0 * s0 yd = y - y.mean() - yd4 = yd ** 4 - yd2 = yd ** 2 + yd4 = yd**4 + yd2 = yd**2 n2 = n * n k = (yd4.sum() / n) / ((yd2.sum() / n) ** 2) A = (n - 1) * s1 * (n2 - 3 * n + 3 - (n - 1) * k) @@ -191,27 +192,27 @@ def by_col( Parameters ---------- - df : pandas.DataFrame - a pandas dataframe with a geometry column - cols : string or list of string - name or list of names of columns to use to compute the statistic - w : pysal weights object - a weights object aligned with the dataframe. If not provided, this - is searched for in the dataframe's metadata - inplace : bool - a boolean denoting whether to operate on the dataframe inplace or to - return a series contaning the results of the computation. If - operating inplace, with default configurations, - the derived columns will be named like 'column_geary' and 'column_p_sim' - pvalue : string - a string denoting which pvalue should be returned. Refer to the - the Geary statistic's documentation for available p-values - outvals : list of strings - list of arbitrary attributes to return as columns from the - Geary statistic - **stat_kws : keyword arguments - options to pass to the underlying statistic. For this, see the - documentation for the Geary statistic. + df : pandas.DataFrame + a pandas dataframe with a geometry column + cols : string or list of string + name or list of names of columns to use to compute the statistic + w : pysal weights object + a weights object aligned with the dataframe. If not provided, this + is searched for in the dataframe's metadata + inplace : bool + a boolean denoting whether to operate on the dataframe inplace or to + return a series contaning the results of the computation. If + operating inplace, with default configurations, + the derived columns will be named like 'column_geary' and 'column_p_sim' + pvalue : string + a string denoting which pvalue should be returned. Refer to the + the Geary statistic's documentation for available p-values + outvals : list of strings + list of arbitrary attributes to return as columns from the + Geary statistic + **stat_kws : dict + options to pass to the underlying statistic. For this, see the + documentation for the Geary statistic. Returns -------- diff --git a/esda/geary_local.py b/esda/geary_local.py index 64e03024..728f316d 100644 --- a/esda/geary_local.py +++ b/esda/geary_local.py @@ -1,11 +1,10 @@ import numpy as np import pandas as pd -import warnings -from scipy import sparse -from scipy import stats from sklearn.base import BaseEstimator -import libpysal as lp -from esda.crand import crand as _crand_plus, njit as _njit, _prepare_univariate + +from esda.crand import _prepare_univariate +from esda.crand import crand as _crand_plus +from esda.crand import njit as _njit class Geary_Local(BaseEstimator): @@ -63,10 +62,10 @@ def __init__( of the function, since numba does not correctly interpret external seeds nor numpy.random.RandomState instances. - island_weight: - value to use as a weight for the "fake" neighbor for every island. If numpy.nan, - will propagate to the final local statistic depending on the `stat_func`. If 0, then - the lag is always zero for islands. + island_weight : + value to use as a weight for the "fake" neighbor for every island. + If numpy.nan, will propagate to the final local statistic depending + on the `stat_func`. If 0, then the lag is always zero for islands. Attributes ---------- @@ -126,7 +125,6 @@ def fit(self, x): sig = self.sig keep_simulations = self.keep_simulations n_jobs = self.n_jobs - seed = self.seed self.localG = self._statistic(x, w) @@ -148,11 +146,13 @@ def fit(self, x): # Create empty vector to fill self.labs = np.empty(len(x)) * np.nan # Outliers - self.labs[(self.localG < Eij_mean) & (x > x_mean) & (self.p_sim <= sig)] = 1 + locg_lt_eij = self.localG < Eij_mean + p_leq_sig = self.p_sim <= sig + self.labs[locg_lt_eij & (x > x_mean) & p_leq_sig] = 1 # Clusters - self.labs[(self.localG < Eij_mean) & (x < x_mean) & (self.p_sim <= sig)] = 2 + self.labs[locg_lt_eij & (x < x_mean) & p_leq_sig] = 2 # Other - self.labs[(self.localG > Eij_mean) & (self.p_sim <= sig)] = 3 + self.labs[(self.localG > Eij_mean) & p_leq_sig] = 3 # Non-significant self.labs[self.p_sim > sig] = 4 @@ -191,7 +191,6 @@ def _statistic(x, w): @_njit(fastmath=True) def _local_geary(i, z, permuted_ids, weights_i, scaling): - self_weight = weights_i[0] other_weights = weights_i[1:] zi, zrand = _prepare_univariate(i, z, permuted_ids, other_weights) return (zi - zrand) ** 2 @ other_weights diff --git a/esda/geary_local_mv.py b/esda/geary_local_mv.py index 16f4ceaf..f9dbccc6 100644 --- a/esda/geary_local_mv.py +++ b/esda/geary_local_mv.py @@ -157,15 +157,13 @@ def _crand(self, zvariables): np.random.shuffle(idsi) vars_rand = [] for j in range(nvars): - vars_rand.append(zvariables[j][idsi[rids[:, 0 : wc[i]]]]) + vars_rand.append(zvariables[j][idsi[rids[:, 0 : wc[i]]]]) # noqa E203 # vars rand as tmp # Calculate diff diff = [] for z in range(nvars): - diff.append( - (np.array((zvariables[z][i] - vars_rand[z]) ** 2 * w[i])).sum(1) - / nvars - ) + _diff = (np.array((zvariables[z][i] - vars_rand[z]) ** 2 * w[i])) + diff.append(_diff.sum(1) / nvars) # add up differences temp = np.array([sum(x) for x in zip(*diff)]) # Assign to object to be returned diff --git a/esda/getisord.py b/esda/getisord.py index c87231d7..7390f976 100644 --- a/esda/getisord.py +++ b/esda/getisord.py @@ -5,11 +5,15 @@ __all__ = ["G", "G_Local"] import warnings + from libpysal.common import np, stats from libpysal.weights.spatial_lag import lag_spatial as slag from libpysal.weights.util import fill_diagonal + +from .crand import _prepare_univariate +from .crand import crand as _crand_plus +from .crand import njit as _njit from .tabular import _univariate_handler -from .crand import njit as _njit, crand as _crand_plus, _prepare_univariate PERMUTATIONS = 999 @@ -29,40 +33,40 @@ class G(object): Attributes ---------- - y : array - original variable - w : W - DistanceBand W spatial weights based on distance band - permutation : int - the number of permutations - G : float - the value of statistic - EG : float - the expected value of statistic - VG : float - the variance of G under normality assumption - z_norm : float - standard normal test statistic - p_norm : float - p-value under normality assumption (one-sided) - sim : array - (if permutations > 0) - vector of G values for permutated samples - p_sim : float - p-value based on permutations (one-sided) - null: spatial randomness - alternative: the observed G is extreme it is either extremely high or extremely low - EG_sim : float - average value of G from permutations - VG_sim : float - variance of G from permutations - seG_sim : float - standard deviation of G under permutations. - z_sim : float - standardized G based on permutations - p_z_sim : float - p-value based on standard normal approximation from - permutations (one-sided) + y : array + original variable + w : W + DistanceBand W spatial weights based on distance band + permutation : int + the number of permutations + G : float + the value of statistic + EG : float + the expected value of statistic + VG : float + the variance of G under normality assumption + z_norm : float + standard normal test statistic + p_norm : float + p-value under normality assumption (one-sided) + sim : array + (if permutations > 0) + vector of G values for permutated samples + p_sim : float + p-value based on permutations (one-sided) + null: spatial randomness + alternative: the observed G is extreme it is either + extremely high or extremely low + EG_sim : float + average value of G from permutations + VG_sim : float + variance of G from permutations + seG_sim : float + standard deviation of G under permutations. + z_sim : float + standardized G based on permutations + p_z_sim : float + p-value based on standard normal approximation from permutations (one-sided) Notes ----- @@ -134,7 +138,7 @@ def __init__(self, y, w, permutations=PERMUTATIONS): self.p_sim = (larger + 1.0) / (permutations + 1.0) self.EG_sim = sum(sim) / permutations self.seG_sim = sim.std() - self.VG_sim = self.seG_sim ** 2 + self.VG_sim = self.seG_sim**2 self.z_sim = (self.G - self.EG_sim) / self.seG_sim self.p_z_sim = 1.0 - stats.norm.cdf(np.abs(self.z_sim)) @@ -166,7 +170,7 @@ def __moments(self): EG2NUM = EG2 EG2DEN = ((sum(y) ** 2 - sum(y2)) ** 2) * n * (n - 1) * (n - 2) * (n - 3) self.EG2 = EG2NUM / EG2DEN - self.VG = self.EG2 - self.EG ** 2 + self.VG = self.EG2 - self.EG**2 def __calc(self, y): yl = slag(self.w, y) @@ -175,7 +179,7 @@ def __calc(self, y): @property def _statistic(self): - """ Standardized accessor for esda statistics""" + """Standardized accessor for esda statistics""" return self.G @classmethod @@ -187,31 +191,31 @@ def by_col( Parameters ---------- - df : pandas.DataFrame - a pandas dataframe with a geometry column - cols : string or list of string - name or list of names of columns to use to compute the statistic - w : pysal weights object - a weights object aligned with the dataframe. If not provided, this - is searched for in the dataframe's metadata - inplace : bool - a boolean denoting whether to operate on the dataframe inplace or to - return a series contaning the results of the computation. If - operating inplace, the derived columns will be named 'column_g' - pvalue : string - a string denoting which pvalue should be returned. Refer to the - the G statistic's documentation for available p-values - outvals : list of strings - list of arbitrary attributes to return as columns from the - G statistic - **stat_kws : keyword arguments - options to pass to the underlying statistic. For this, see the - documentation for the G statistic. + df : pandas.DataFrame + a pandas dataframe with a geometry column + cols : string or list of string + name or list of names of columns to use to compute the statistic + w : pysal weights object + a weights object aligned with the dataframe. If not provided, this + is searched for in the dataframe's metadata + inplace : bool + a boolean denoting whether to operate on the dataframe inplace or to + return a series contaning the results of the computation. If + operating inplace, the derived columns will be named 'column_g' + pvalue : string + a string denoting which pvalue should be returned. Refer to the + the G statistic's documentation for available p-values + outvals : list of strings + list of arbitrary attributes to return as columns from the G statistic + **stat_kws : dict + options to pass to the underlying statistic. For this, see the + documentation for the G statistic. Returns -------- - If inplace, None, and operation is conducted on dataframe in memory. Otherwise, - returns a copy of the dataframe with the relevant columns attached. + If inplace, None, and operation is conducted on dataframe + in memory. Otherwise, returns a copy of the dataframe with + the relevant columns attached. """ return _univariate_handler( @@ -250,9 +254,9 @@ class G_Local(object): provided in binary form, and standardization/self-weighting will be handled by the function itself. island_weight: - value to use as a weight for the "fake" neighbor for every island. If numpy.nan, - will propagate to the final local statistic depending on the `stat_func`. If 0, then - the lag is always zero for islands. + value to use as a weight for the "fake" neighbor for every island. + If numpy.nan, will propagate to the final local statistic depending + on the `stat_func`. If 0, then the lag is always zero for islands. Attributes ---------- @@ -279,9 +283,10 @@ class G_Local(object): of arrays of floats (if permutations>0), vector of I values for permutated samples p_sim : array - of floats, p-value based on permutations (one-sided) - null - spatial randomness - alternative - the observed G is extreme it is either extremely high or extremely low + of floats, p-value based on permutations (one-sided) + null - spatial randomness + alternative - the observed G is extreme + (it is either extremely high or extremely low) EG_sim : array of floats, average value of G from permutations VG_sim : array @@ -440,7 +445,6 @@ def __crand(self, keep_simulations): k = self.w.max_neighbors + 1 rids = np.array([np.random.permutation(rid)[0:k] for i in prange]) ids = np.arange(self.w.n) - ido = self.w.id_order wc = self.__getCardinalities() if self.w_transform == "r": den = np.array(wc) + self.star @@ -486,19 +490,18 @@ def calc(self): empirical_mean = (y.sum() - y * remove_self) / N # variance looks complex, yes, but it obtains from E[x^2] - E[x]^2. # So, break it down to allow subtraction of the self-neighbor. - mean_of_squares = ((y ** 2).sum() - (y ** 2) * remove_self) / N - empirical_variance = mean_of_squares - empirical_mean ** 2 + mean_of_squares = ((y**2).sum() - (y**2) * remove_self) / N + empirical_variance = mean_of_squares - empirical_mean**2 # Since we have corrected the diagonal, this should work cardinality = np.asarray(W.sum(axis=1)).squeeze() expected_value = cardinality / N - expected_variance = ( - cardinality - * (N - cardinality) - / (N - 1) - * (1 / N ** 2) - * (empirical_variance / (empirical_mean ** 2)) - ) + + expected_variance = cardinality * (N - cardinality) + expected_variance /= (N - 1) + expected_variance *= (1 / N**2) + expected_variance *= (empirical_variance / (empirical_mean**2)) + z_scores = (statistic - expected_value) / np.sqrt(expected_variance) self.Gs = statistic @@ -520,26 +523,26 @@ def by_col( Parameters ---------- - df : pandas.DataFrame - a pandas dataframe with a geometry column - cols : string or list of string - name or list of names of columns to use to compute the statistic - w : pysal weights object - a weights object aligned with the dataframe. If not provided, this - is searched for in the dataframe's metadata - inplace : bool - a boolean denoting whether to operate on the dataframe inplace or to - return a series contaning the results of the computation. If - operating inplace, the derived columns will be named 'column_g_local' - pvalue : string - a string denoting which pvalue should be returned. Refer to the - the G_Local statistic's documentation for available p-values - outvals : list of strings - list of arbitrary attributes to return as columns from the - G_Local statistic - **stat_kws : keyword arguments - options to pass to the underlying statistic. For this, see the - documentation for the G_Local statistic. + df : pandas.DataFrame + a pandas dataframe with a geometry column + cols : string or list of string + name or list of names of columns to use to compute the statistic + w : pysal weights object + a weights object aligned with the dataframe. If not provided, this + is searched for in the dataframe's metadata + inplace : bool + a boolean denoting whether to operate on the dataframe inplace or to + return a series contaning the results of the computation. If + operating inplace, the derived columns will be named 'column_g_local' + pvalue : string + a string denoting which pvalue should be returned. Refer to the + the G_Local statistic's documentation for available p-values + outvals : list of strings + list of arbitrary attributes to return as columns from the + G_Local statistic + **stat_kws : dict + options to pass to the underlying statistic. For this, see the + documentation for the G_Local statistic. Returns ------- @@ -608,7 +611,7 @@ def _infer_star_and_structure_w(weights, star, transform): else: # star was something else, so try to fill the weights with it try: weights = fill_diagonal(weights, star) - except: + except TypeError: raise TypeError( f"Type of star ({type(star)}) not understood." f" Must be an integer, boolean, float, or numpy.ndarray." @@ -640,9 +643,12 @@ def _g_local_star_crand(i, z, permuted_ids, weights_i, scaling): if __name__ == "__main__": - import geopandas, numpy, esda, importlib - import matplotlib.pyplot as plt - from libpysal import weights, examples + + import geopandas + import numpy + from libpysal import examples, weights + + import esda df = geopandas.read_file(examples.get_path("NAT.shp")) diff --git a/esda/join_counts.py b/esda/join_counts.py index 7d46c4d4..5d5d888e 100644 --- a/esda/join_counts.py +++ b/esda/join_counts.py @@ -4,18 +4,12 @@ """ __author__ = "Sergio J. Rey , Luc Anselin " -from libpysal.weights.spatial_lag import lag_spatial -from .tabular import _univariate_handler -from scipy.stats import chi2_contingency -from scipy.stats import chi2 import numpy as np import pandas as pd -from .crand import ( - crand as _crand_plus, - njit as _njit, - _prepare_univariate, - _prepare_bivariate, -) +from scipy.stats import chi2, chi2_contingency + +from .crand import njit as _njit +from .tabular import _univariate_handler __all__ = ["Join_Counts"] @@ -258,32 +252,33 @@ def by_col( Parameters ---------- - df : pandas.DataFrame - a pandas dataframe with a geometry column - cols : string or list of string - name or list of names of columns to use to compute the statistic - w : pysal weights object - a weights object aligned with the dataframe. If not provided, this - is searched for in the dataframe's metadata - inplace : bool - a boolean denoting whether to operate on the dataframe inplace or to - return a series contaning the results of the computation. If - operating inplace, the derived columns will be named - 'column_join_count' - pvalue : string - a string denoting which pvalue should be returned. Refer to the - the Join_Count statistic's documentation for available p-values - outvals : list of strings - list of arbitrary attributes to return as columns from the - Join_Count statistic - **stat_kws : keyword arguments - options to pass to the underlying statistic. For this, see the - documentation for the Join_Count statistic. + df : pandas.DataFrame + a pandas dataframe with a geometry column + cols : string or list of string + name or list of names of columns to use to compute the statistic + w : pysal weights object + a weights object aligned with the dataframe. If not provided, this + is searched for in the dataframe's metadata + inplace : bool + a boolean denoting whether to operate on the dataframe inplace or to + return a series contaning the results of the computation. If + operating inplace, the derived columns will be named + 'column_join_count' + pvalue : string + a string denoting which pvalue should be returned. Refer to the + the Join_Count statistic's documentation for available p-values + outvals : list of strings + list of arbitrary attributes to return as columns from the + Join_Count statistic + **stat_k : dict + options to pass to the underlying statistic. For this, see the + documentation for the Join_Count statistic. Returns -------- - If inplace, None, and operation is conducted on dataframe in memory. Otherwise, - returns a copy of the dataframe with the relevant columns attached. + If inplace, None, and operation is conducted on + dataframe in memory. Otherwise, returns a copy of the + dataframe with the relevant columns attached. """ if outvals is None: diff --git a/esda/join_counts_local.py b/esda/join_counts_local.py index 379be4f1..88321c1f 100644 --- a/esda/join_counts_local.py +++ b/esda/join_counts_local.py @@ -1,9 +1,11 @@ import numpy as np import pandas as pd -from sklearn.base import BaseEstimator from libpysal import weights -from esda.crand import crand as _crand_plus, njit as _njit, _prepare_univariate +from sklearn.base import BaseEstimator +from esda.crand import _prepare_univariate +from esda.crand import crand as _crand_plus +from esda.crand import njit as _njit PERMUTATIONS = 999 @@ -26,30 +28,29 @@ def __init__( Parameters ---------- - connectivity : scipy.sparse matrix object - the connectivity structure describing - the relationships between observed units. - Need not be row-standardized. - permutations : int - number of random permutations for calculation of pseudo - p_values - n_jobs : int - Number of cores to be used in the conditional randomisation. If -1, - all available cores are used. - keep_simulations : Boolean - (default=True) - If True, the entire matrix of replications under the null - is stored in memory and accessible; otherwise, replications - are not saved - seed : None/int - Seed to ensure reproducibility of conditional randomizations. - Must be set here, and not outside of the function, since numba - does not correctly interpret external seeds - nor numpy.random.RandomState instances. + connectivity : scipy.sparse matrix object + the connectivity structure describing + the relationships between observed units. + Need not be row-standardized. + permutations : int + number of random permutations for calculation of pseudo + p_values + n_jobs : int + Number of cores to be used in the conditional randomisation. If -1, + all available cores are used. + keep_simulations : bool (default True) + If True, the entire matrix of replications under the null + is stored in memory and accessible; otherwise, replications + are not saved + seed : None/int + Seed to ensure reproducibility of conditional randomizations. + Must be set here, and not outside of the function, since numba + does not correctly interpret external seeds + nor numpy.random.RandomState instances. island_weight: - value to use as a weight for the "fake" neighbor for every island. If numpy.nan, - will propagate to the final local statistic depending on the `stat_func`. If 0, then - the lag is always zero for islands. + value to use as a weight for the "fake" neighbor for every island. + If numpy.nan, will propagate to the final local statistic depending + on the `stat_func`. If 0, then the lag is always zero for islands. Attributes ---------- @@ -116,7 +117,7 @@ def fit(self, y, n_jobs=1, permutations=999): keep_simulations = self.keep_simulations n_jobs = self.n_jobs - seed = self.seed + # seed = self.seed self.y = y self.n = len(y) @@ -167,7 +168,7 @@ def _statistic(y, w): @_njit(fastmath=True) def _ljc_uni(i, z, permuted_ids, weights_i, scaling): - self_weight = weights_i[0] + # self_weight = weights_i[0] other_weights = weights_i[1:] zi, zrand = _prepare_univariate(i, z, permuted_ids, other_weights) return zi * (zrand @ other_weights) diff --git a/esda/join_counts_local_bv.py b/esda/join_counts_local_bv.py index f4ccb113..2da6c9a2 100644 --- a/esda/join_counts_local_bv.py +++ b/esda/join_counts_local_bv.py @@ -1,16 +1,11 @@ import numpy as np import pandas as pd -import warnings -from scipy import sparse -from sklearn.base import BaseEstimator from libpysal import weights -from esda.crand import ( - crand as _crand_plus, - njit as _njit, - _prepare_univariate, - _prepare_bivariate, -) +from sklearn.base import BaseEstimator +from esda.crand import _prepare_bivariate, _prepare_univariate +from esda.crand import crand as _crand_plus +from esda.crand import njit as _njit PERMUTATIONS = 999 @@ -33,30 +28,28 @@ def __init__( Parameters ---------- - connectivity : scipy.sparse matrix object - the connectivity structure describing - the relationships between observed units. - Need not be row-standardized. - permutations : int - number of random permutations for calculation of pseudo - p_values - n_jobs : int - Number of cores to be used in the conditional randomisation. If -1, - all available cores are used. - keep_simulations : Boolean - (default=True) - If True, the entire matrix of replications under the null - is stored in memory and accessible; otherwise, replications - are not saved - seed : None/int - Seed to ensure reproducibility of conditional randomizations. - Must be set here, and not outside of the function, since numba - does not correctly interpret external seeds - nor numpy.random.RandomState instances. + connectivity : scipy.sparse matrix object + the connectivity structure describing + the relationships between observed units. + Need not be row-standardized. + permutations : int + number of random permutations for calculation of pseudo p_values + n_jobs : int + Number of cores to be used in the conditional randomisation. + If -1, all available cores are used. + keep_simulations : bool (default True) + If True, the entire matrix of replications under the null + is stored in memory and accessible; otherwise, replications + are not saved + seed : None/int + Seed to ensure reproducibility of conditional randomizations. + Must be set here, and not outside of the function, since numba + does not correctly interpret external seeds + nor numpy.random.RandomState instances. island_weight: - value to use as a weight for the "fake" neighbor for every island. If numpy.nan, - will propagate to the final local statistic depending on the `stat_func`. If 0, then - the lag is always zero for islands. + value to use as a weight for the "fake" neighbor for every island. + If numpy.nan, will propagate to the final local statistic depending + on the `stat_func`. If 0, then the lag is always zero for islands. """ self.connectivity = connectivity @@ -99,9 +92,14 @@ def fit(self, x, z, case="CLC", n_jobs=1, permutations=999): Commpop data replicating GeoDa tutorial (Case 1) >>> import libpysal >>> import geopandas as gpd - >>> commpop = gpd.read_file("https://github.com/jeffcsauer/GSOC2020/raw/master/validation/data/commpop.gpkg") + >>> commpop = gpd.read_file( + ... "https://github.com/jeffcsauer/GSOC2020/raw/master/" + ... "validation/data/commpop.gpkg" + ... ) >>> w = libpysal.weights.Queen.from_dataframe(commpop) - >>> LJC_BV_Case1 = Local_Join_Counts_BV(connectivity=w).fit(commpop['popneg'], commpop['popplus'], case='BJC') + >>> LJC_BV_Case1 = Local_Join_Counts_BV( + ... connectivity=w + ... ).fit(commpop['popneg'], commpop['popplus'], case='BJC') >>> LJC_BV_Case1.LJC >>> LJC_BV_Case1.p_sim @@ -115,12 +113,13 @@ def fit(self, x, z, case="CLC", n_jobs=1, permutations=999): >>> guerry_ds.loc[(guerry_ds['Infants'] > 23574), 'infq5'] = 1 >>> guerry_ds.loc[(guerry_ds['Donatns'] > 10973), 'donq5'] = 1 >>> w = libpysal.weights.Queen.from_dataframe(guerry_ds) - >>> LJC_BV_Case2 = Local_Join_Counts_BV(connectivity=w).fit(guerry_ds['infq5'], guerry_ds['donq5'], case='CLC') + >>> LJC_BV_Case2 = Local_Join_Counts_BV( + ... connectivity=w + ... ).fit(guerry_ds['infq5'], guerry_ds['donq5'], case='CLC') >>> LJC_BV_Case2.LJC >>> LJC_BV_Case2.p_sim """ - # Need to ensure that the np.array() are of - # dtype='float' for numba + # Need to ensure that the np.array() are of dtype='float' for numba x = np.array(x, dtype="float") z = np.array(z, dtype="float") @@ -135,9 +134,7 @@ def fit(self, x, z, case="CLC", n_jobs=1, permutations=999): self.w = w self.case = case - keep_simulations = self.keep_simulations n_jobs = self.n_jobs - seed = self.seed self.LJC = self._statistic(x, z, w, case=case) @@ -182,8 +179,7 @@ def _statistic(x, z, w, case): # different from the esda.Join_Counts() function. adj_list = w.to_adjlist(remove_symmetric=False) - # First, set up a series that maps the values - # to the weights table + # First, set up a series that maps the values to the weights table zseries_x = pd.Series(x, index=w.id_order) zseries_z = pd.Series(z, index=w.id_order) @@ -233,7 +229,6 @@ def _statistic(x, z, w, case): def _ljc_bv_case1(i, z, permuted_ids, weights_i, scaling): zx = z[:, 0] zy = z[:, 1] - self_weight = weights_i[0] other_weights = weights_i[1:] zyi, zyrand = _prepare_univariate(i, zy, permuted_ids, other_weights) return zx[i] * (zyrand @ other_weights) @@ -241,9 +236,7 @@ def _ljc_bv_case1(i, z, permuted_ids, weights_i, scaling): @_njit(fastmath=True) def _ljc_bv_case2(i, z, permuted_ids, weights_i, scaling): - zx = z[:, 0] zy = z[:, 1] - self_weight = weights_i[0] other_weights = weights_i[1:] zxi, zxrand, zyi, zyrand = _prepare_bivariate(i, z, permuted_ids, other_weights) zf = zxrand * zyrand diff --git a/esda/join_counts_local_mv.py b/esda/join_counts_local_mv.py index 49e8fec6..0f52b718 100644 --- a/esda/join_counts_local_mv.py +++ b/esda/join_counts_local_mv.py @@ -1,18 +1,17 @@ import numpy as np import pandas as pd -from scipy import sparse -from sklearn.base import BaseEstimator from libpysal import weights -from esda.crand import crand as _crand_plus, njit as _njit, _prepare_univariate +from sklearn.base import BaseEstimator +from esda.crand import _prepare_univariate +from esda.crand import crand as _crand_plus +from esda.crand import njit as _njit PERMUTATIONS = 999 class Join_Counts_Local_MV(BaseEstimator): - """Multivariate Local Join Count Statistic""" - def __init__( self, connectivity=None, @@ -27,30 +26,27 @@ def __init__( Parameters ---------- - connectivity : scipy.sparse matrix object - the connectivity structure describing - the relationships between observed units. - Need not be row-standardized. - permutations : int - number of random permutations for calculation of pseudo - p_values - n_jobs : int - Number of cores to be used in the conditional randomisation. If -1, - all available cores are used. - keep_simulations : Boolean - (default=True) - If True, the entire matrix of replications under the null - is stored in memory and accessible; otherwise, replications - are not saved - seed : None/int - Seed to ensure reproducibility of conditional randomizations. - Must be set here, and not outside of the function, since numba - does not correctly interpret external seeds - nor numpy.random.RandomState instances. - island_weight: - value to use as a weight for the "fake" neighbor for every island. If numpy.nan, - will propagate to the final local statistic depending on the `stat_func`. If 0, then - the lag is always zero for islands. + connectivity : scipy.sparse matrix object + the connectivity structure describing + the relationships between observed units. + Need not be row-standardized. + permutations : int + number of random permutations for calculation of pseudo p_values + n_jobs : int + Number of cores to be used in the conditional randomisation. If -1, + all available cores are used. + keep_simulations : bool (default True) + If ``True``, the entire matrix of replications under the null is stored + in memory and accessible; otherwise, replications are not saved + seed : int (default None) + Seed to ensure reproducibility of conditional randomizations. + Must be set here, and not outside of the function, since numba + does not correctly interpret external seeds + nor numpy.random.RandomState instances. + island_weight : int or float (default 0) + value to use as a weight for the "fake" neighbor for every island. + If ``numpy.nan``, will propagate to the final local statistic depending + on the ``stat_func``. If ``0``, then the lag is always zero for islands. """ @@ -99,7 +95,9 @@ def fit(self, variables, n_jobs=1, permutations=999): >>> guerry_ds.loc[(guerry_ds['Donatns'] > 10973), 'donq5'] = 1 >>> guerry_ds.loc[(guerry_ds['Suicids'] > 55564), 'suic5'] = 1 >>> w = libpysal.weights.Queen.from_dataframe(guerry_ds) - >>> LJC_MV = Local_Join_Counts_MV(connectivity=w).fit([guerry_ds['infq5'], guerry_ds['donq5'], guerry_ds['suic5']]) + >>> LJC_MV = Local_Join_Counts_MV( + ... connectivity=w + ... ).fit([guerry_ds['infq5'], guerry_ds['donq5'], guerry_ds['suic5']]) >>> LJC_MV.LJC >>> LJC_MV.p_sim """ @@ -114,10 +112,6 @@ def fit(self, variables, n_jobs=1, permutations=999): self.variables = np.array(variables, dtype="float") - keep_simulations = self.keep_simulations - n_jobs = self.n_jobs - seed = self.seed - # Need to ensure that the product is an # np.array() of dtype='float' for numba self.ext = np.array(np.prod(np.vstack(variables), axis=0), dtype="float") @@ -159,7 +153,7 @@ def _statistic(variables, w): # focal and neighbor values == 1 focal_all = np.array(np.all(np.dstack(focal) == 1, axis=2)) neighbor_all = np.array(np.all(np.dstack(neighbor) == 1, axis=2)) - MCLC = (focal_all == True) & (neighbor_all == True) + MCLC = (focal_all) & (neighbor_all) # Convert list of True/False to boolean array # and unlist (necessary for building pd.DF) MCLC = list(MCLC * 1) @@ -183,7 +177,6 @@ def _statistic(variables, w): @_njit(fastmath=True) def _ljc_mv(i, z, permuted_ids, weights_i, scaling): - self_weight = weights_i[0] other_weights = weights_i[1:] zi, zrand = _prepare_univariate(i, z, permuted_ids, other_weights) return zi * (zrand @ other_weights) diff --git a/esda/lee.py b/esda/lee.py index 8426aa44..d2aeb480 100644 --- a/esda/lee.py +++ b/esda/lee.py @@ -1,14 +1,10 @@ import numpy from scipy import sparse +from sklearn import preprocessing, utils from sklearn.base import BaseEstimator -from sklearn import preprocessing -from sklearn import utils -from .crand import ( - crand as _crand_plus, - njit as _njit, - _prepare_univariate, - _prepare_bivariate, -) + +from .crand import _prepare_bivariate +from .crand import njit as _njit class Spatial_Pearson(BaseEstimator): @@ -49,7 +45,7 @@ def fit(self, x, y): """ bivariate spatial pearson's R based on Eq. 18 of :cite:`Lee2001`. - L = \dfrac{Z^T (V^TV) Z}{1^T (V^TV) 1} + L = \\dfrac{Z^T (V^TV) Z}{1^T (V^TV) 1} Parameters ---------- @@ -79,10 +75,6 @@ def fit(self, x, y): self.connectivity = sparse.eye(Z.shape[0]) self.association_ = self._statistic(Z, self.connectivity) - standard_connectivity = sparse.csc_matrix( - self.connectivity / self.connectivity.sum(axis=1) - ) - if self.permutations is None: return self elif self.permutations < 1: @@ -154,22 +146,24 @@ def fit(self, x, y): bivariate local pearson's R based on Eq. 22 in Lee (2001), using site-wise conditional randomization from Moran_Local_BV. - L_i = \dfrac{ - n \cdot - \Big[\big(\sum_i w_{ij}(x_j - \bar{x})\big) - \big(\sum_i w_{ij}(y_j - \bar{y})\big) \Big] - } - { - \sqrt{\sum_i (x_i - \bar{x})^2} - \sqrt{\sum_i (y_i - \bar{y})^2}} - = \dfrac{ - n \cdot - (\tilde{x}_j - \bar{x}) - (\tilde{y}_j - \bar{y}) - } - { - \sqrt{\sum_i (x_i - \bar{x})^2} - \sqrt{\sum_i (y_i - \bar{y})^2}} + .. math:: + + L_i = \\dfrac{ + n \\cdot + \\Big[\big(\\sum_i w_{ij}(x_j - \bar{x})\big) + \big(\\sum_i w_{ij}(y_j - \bar{y})\big) \\Big] + } + { + \\sqrt{\\sum_i (x_i - \bar{x})^2} + \\sqrt{\\sum_i (y_i - \bar{y})^2}} + = \\dfrac{ + n \\cdot + (\tilde{x}_j - \bar{x}) + (\tilde{y}_j - \bar{y}) + } + { + \\sqrt{\\sum_i (x_i - \bar{x})^2} + \\sqrt{\\sum_i (y_i - \bar{y})^2}} Lee, Sang Il. (2001), "Developing a bivariate spatial association measure: An integration of Pearson's r and @@ -207,7 +201,7 @@ def fit(self, x, y): max_neighbors = (standard_connectivity != 0).sum(axis=1).max() random_ids = numpy.array( [ - numpy.random.permutation(n - 1)[0 : max_neighbors + 1] + numpy.random.permutation(n - 1)[0 : max_neighbors + 1] # noqa 208 for i in range(self.permutations) ] ) diff --git a/esda/losh.py b/esda/losh.py index 1087d040..181d5bce 100644 --- a/esda/losh.py +++ b/esda/losh.py @@ -1,9 +1,9 @@ -import numpy as np import warnings -from scipy import sparse + +import libpysal as lp +import numpy as np from scipy import stats from sklearn.base import BaseEstimator -import libpysal as lp class LOSH(BaseEstimator): @@ -88,17 +88,21 @@ def fit(self, y, a=2): if self.inference is None: return self - elif self.inference == 'chi-square': + elif self.inference == "chi-square": if a != 2: - warnings.warn(f'Chi-square inference assumes that a=2, but \ - a={a}. This means the inference will be invalid!') + warnings.warn( + f"Chi-square inference assumes that a=2, but \ + a={a}. This means the inference will be invalid!" + ) else: - dof = 2/self.VarHi - Zi = (2*self.Hi)/self.VarHi + dof = 2 / self.VarHi + Zi = (2 * self.Hi) / self.VarHi self.pval = 1 - stats.chi2.cdf(Zi, dof) else: - raise NotImplementedError(f'The requested inference method \ - ({self.inference}) is not currently supported!') + raise NotImplementedError( + f"The requested inference method \ + ({self.inference}) is not currently supported!" + ) return self @@ -113,22 +117,23 @@ def _statistic(y, w, a): rowsum = np.array(w.sparse.sum(axis=1)).flatten() # Calculate spatial mean - ylag = lp.weights.lag_spatial(w, y)/rowsum + ylag = lp.weights.lag_spatial(w, y) / rowsum # Calculate and adjust residuals based on multiplier - yresid = abs(y-ylag)**a + yresid = abs(y - ylag) ** a # Calculate denominator of Hi equation denom = np.mean(yresid) * np.array(rowsum) # Carry out final Hi calculation Hi = lp.weights.lag_spatial(w, yresid) / denom # Calculate average of residuals yresid_mean = np.mean(yresid) + # Calculate VarHi n = len(y) squared_rowsum = np.asarray(w.sparse.multiply(w.sparse).sum(axis=1)).flatten() - - VarHi = ((n-1)**-1) * \ - (denom**-2) * \ - ((np.sum(yresid**2)/n) - yresid_mean**2) * \ - ((n*squared_rowsum) - (rowsum**2)) + term1 = ((n - 1) ** -1) + term2 = (denom**-2) + term3 = ((np.sum(yresid**2) / n) - yresid_mean**2) + term4 = ((n * squared_rowsum) - (rowsum**2)) + VarHi = term1 * term2 * term3 * term4 return (Hi, ylag, yresid, VarHi) diff --git a/esda/map_comparison.py b/esda/map_comparison.py index d4f89e1c..6cd3338c 100644 --- a/esda/map_comparison.py +++ b/esda/map_comparison.py @@ -1,4 +1,5 @@ -import numpy, pandas +import numpy +import pandas from scipy.special import entr try: @@ -27,7 +28,8 @@ def _cast(collection): Cast a collection to a pygeos geometry array. """ try: - import pygeos, geopandas + import geopandas + import pygeos except (ImportError, ModuleNotFoundError) as exception: raise type(exception)( "pygeos and geopandas are required for map comparison statistics." diff --git a/esda/mixture_smoothing.py b/esda/mixture_smoothing.py index f9da6aff..66a86599 100644 --- a/esda/mixture_smoothing.py +++ b/esda/mixture_smoothing.py @@ -6,12 +6,18 @@ in CAMAN R package that is originally written by Peter Schlattmann. """ -__author__ = "Myunghwa Hwang , Luc Anselin , Serge Rey , " + "Luc Anselin , " + "Serge Rey >> mixture = NP_Mixture_Smoother(e,b) - extracting the smoothed rates through the property r of the NP_Mixture_Smoother instance + extracting the smoothed rates through the property + r of the NP_Mixture_Smoother instance >>> mixture.r array([0.10982278, 0.03445531, 0.11018404, 0.11018604]) @@ -111,20 +118,20 @@ class NP_Mixture_Smoother(object): >>> mixture.getRateEstimates() (array([0.0911574, 0.0911574, 0.0911574, 0.0911574]), array([1, 1, 1, 1])) - """ + """ # noqa E501 - def __init__(self, e, b, k=50, acc=1.E-7, numiter=5000, limit=0.01): + def __init__(self, e, b, k=50, acc=1.0e-7, numiter=5000, limit=0.01): self.e = np.asarray(e).flatten() self.b = np.asarray(b).flatten() self.n = len(e) - self.w = 1. / self.n + self.w = 1.0 / self.n self.k = k self.acc = acc self.numiter = numiter self.limit = limit r = self.mixalg() - self.p = r['p'] - self.t = r['grid'] + self.p = r["p"] + self.t = r["grid"] self.r, self.category = self.getRateEstimates() def getSeed(self): @@ -133,7 +140,7 @@ def getSeed(self): r_diff = r_max - r_min step = r_diff / (self.k - 1) grid = np.arange(r_min, r_max + step, step) - p = np.ones(self.k) * 1. / self.k + p = np.ones(self.k) * 1.0 / self.k return p, grid def getMixedProb(self, grid): @@ -146,11 +153,11 @@ def getMixedProb(self, grid): def getGradient(self, mix, p): mix_p = mix * p mix_den = mix_p.sum(axis=1) - obs_id = mix_den > 1.E-13 + obs_id = mix_den > 1.0e-13 for i in range(self.k): mix_den_len = len(mix_den) - if (mix_den > 1.E-13).sum() == mix_den_len: - mix_p[:, i] = (1. / mix_den_len) * mix[:, i] / mix_den + if (mix_den > 1.0e-13).sum() == mix_den_len: + mix_p[:, i] = (1.0 / mix_den_len) * mix[:, i] / mix_den gradient = [] for i in range(self.k): gradient.append(mix_p[:, i][obs_id].sum()) @@ -164,39 +171,44 @@ def getMaxGradient(self, gradient): return (grad_max, grad_max_inx) def getMinGradient(self, gradient, p): - p_fil = p > 1.E-8 + p_fil = p > 1.0e-8 grad_fil = gradient[p_fil] grad_min = grad_fil.min() grad_min_inx = np.where(p_fil)[0][grad_fil.argmin()] - if grad_min >= 1.E+7: - return (1.E+7, 1) + if grad_min >= 1.0e7: + return (1.0e7, 1) return (grad_min, grad_min_inx) def getStepsize(self, mix_den, ht): - mix_den_fil = np.fabs(mix_den) > 1.E-7 + ############################################################################## + # Something seems off in this function + # - a & b are defined twice + # - 4 defined, but unused, variables (commented out) + ############################################################################## + mix_den_fil = np.fabs(mix_den) > 1.0e-7 a = ht[mix_den_fil] / mix_den[mix_den_fil] b = 1.0 + a - b_fil = np.fabs(b) > 1.E-7 w = self.w - sl = w * ht[b_fil] / b[b_fil] - s11 = sl.sum() - s0 = (w * ht).sum() + # b_fil = np.fabs(b) > 1.0e-7 + # sl = w * ht[b_fil] / b[b_fil] + # s11 = sl.sum() + # s0 = (w * ht).sum() - step, oldstep = 0., 0. + step, oldstep = 0.0, 0.0 for i in range(50): - grad1, grad2 = 0., 0. + grad1, grad2 = 0.0, 0.0 for j in range(self.n): a = mix_den[j] + step * ht[j] - if math.fabs(a) > 1.E-7: + if math.fabs(a) > 1.0e-7: b = ht[j] / a grad1 = grad1 + w * b grad2 = grad2 - w * b * b - if math.fabs(grad2) > 1.E-10: + if math.fabs(grad2) > 1.0e-10: step = step - grad1 / grad2 if oldstep > 1.0 and step > oldstep: - step = 1. + step = 1.0 break - if grad1 < 1.E-7: + if grad1 < 1.0e-7: break oldstep = step if step > 1.0: @@ -209,36 +221,49 @@ def vem(self, mix, p, grid): grad, mix_den = self.getGradient(mix, p) grad_max, grad_max_inx = self.getMaxGradient(grad) grad_min, grad_min_inx = self.getMinGradient(grad, p) - ht = (mix[:, grad_max_inx] - mix[:, grad_min_inx] - ) * p[grad_min_inx] + ht = (mix[:, grad_max_inx] - mix[:, grad_min_inx]) * p[grad_min_inx] st = self.getStepsize(mix_den, ht) xs = st * p[grad_min_inx] p[grad_min_inx] = p[grad_min_inx] - xs p[grad_max_inx] = p[grad_max_inx] + xs if (grad_max - 1.0) < self.acc or it == (self.numiter - 1): - res = {'k': self.k, 'accuracy': grad_max - 1.0, 'p': p, 'grid': grid, 'gradient': grad, 'mix_den': mix_den} + res = { + "k": self.k, + "accuracy": grad_max - 1.0, + "p": p, + "grid": grid, + "gradient": grad, + "mix_den": mix_den, + } break return res def update(self, p, grid): - p_inx = p > 1.E-3 + p_inx = p > 1.0e-3 new_p = p[p_inx] new_grid = grid[p_inx] self.k = len(new_p) return new_p, new_grid def em(self, nstep, grid, p): - l = self.k - 1 + l = self.k - 1 # noqa E741 w, n, e, b = self.w, self.n, self.e, self.b if self.k == 1: s11 = (w * b / np.ones(n)).sum() s12 = (w * e / np.ones(n)).sum() grid[l] = s11 / s12 - p[l] = 1. + p[l] = 1.0 mix = self.getMixedProb(grid) grad, mix_den = self.getGradient(mix, p) grad_max, grad_max_inx = self.getMaxGradient(grad) - return {'accuracy': math.fabs(grad_max - 1), 'k': self.k, 'p': p, 'grid': grid, 'gradient': grad, 'mix_den': mix_den} + return { + "accuracy": math.fabs(grad_max - 1), + "k": self.k, + "p": p, + "grid": grid, + "gradient": grad, + "mix_den": mix_den, + } else: res = {} for counter in range(nstep): @@ -246,17 +271,28 @@ def em(self, nstep, grid, p): grad, mix_den = self.getGradient(mix, p) p = p * grad su = p[:-1].sum() - p[l] = 1. - su + p[l] = 1.0 - su for j in range(self.k): - mix_den_fil = mix_den > 1.E-10 + mix_den_fil = mix_den > 1.0e-10 f_len = len(mix_den_fil) - s11 = (w * e[mix_den_fil] / np.ones(f_len) * mix[mix_den_fil, j] / mix_den[mix_den_fil]).sum() - s12 = (w * b[mix_den_fil] * (mix[mix_den_fil, j] / np.ones(f_len)) / mix_den[mix_den_fil]).sum() - if s12 > 1.E-12: + ones = np.ones(f_len) + mdf_j = mix[mix_den_fil, j] + _mdf = mix_den[mix_den_fil] + s11 = (w * e[mix_den_fil] / ones * mdf_j / _mdf).sum() + s12 = (w * b[mix_den_fil] / ones * mdf_j / _mdf).sum() + if s12 > 1.0e-12: grid[j] = s11 / s12 grad_max, grad_max_inx = self.getMaxGradient(grad) - res = {'accuracy': math.fabs(grad_max - 1.), 'step': counter + 1, 'k': self.k, 'p': p, 'grid': grid, 'gradient': grad, 'mix_den': mix_den} - if res['accuracy'] < self.acc and counter > 10: + res = { + "accuracy": math.fabs(grad_max - 1.0), + "step": counter + 1, + "k": self.k, + "p": p, + "grid": grid, + "gradient": grad, + "mix_den": mix_den, + } + if res["accuracy"] < self.acc and counter > 10: break return res @@ -266,7 +302,7 @@ def getLikelihood(self, mix_den): return r def combine(self, res): - p, grid, k = res['p'], res['grid'], self.k + p, grid, k = res["p"], res["grid"], self.k diff = np.fabs(grid[:-1] - grid[1:]) bp_seeds = (diff >= self.limit).nonzero()[0] + 1 if k - len(bp_seeds) > 1: @@ -279,26 +315,31 @@ def combine(self, res): bp.append(bp_seeds[0]) for i in range(1, len(bp_seeds)): if bp_seeds[i] - bp_seeds[i - 1] > 1: - bp.append(a[i]) + ############################################################## + # NEEDS ATTENTION + # `a` is not defined anywhere... what is it? + # -- seems like the condition is never met + # (bp_seeds[i] - bp_seeds[i - 1] > 1) + bp.append(a[i]) # noqa F821 + ############################################################## new_grid, new_p = [], [] for i in range(len(bp) - 1): new_grid.append(grid[bp[i]]) - new_p.append(p[bp[i]:bp[i + 1]].sum()) - self.k = new_k = len(new_p) + new_p.append(p[bp[i] : bp[i + 1]].sum()) # noqa E203 + self.k = len(new_p) new_grid, new_p = np.array(new_grid), np.array(new_p) mix = self.getMixedProb(new_grid) grad, mix_den = self.getGradient(mix, new_p) res = self.em(1, new_grid, new_p) if res is not None: - res['likelihood'] = self.getLikelihood(mix_den) + res["likelihood"] = self.getLikelihood(mix_den) return res def mixalg(self): - e, b, k, n = self.e, self.b, self.k, self.n p, grid = self.getSeed() mix = self.getMixedProb(grid) vem_res = self.vem(mix, p, grid) - p, grid, k = vem_res['p'], vem_res['grid'], vem_res['k'] + p, grid = vem_res["p"], vem_res["grid"] n_p, n_g = self.update(p, grid) em_res = self.em(self.numiter, n_g, n_p) com_res = self.combine(em_res) diff --git a/esda/moran.py b/esda/moran.py index 5bc85316..6308664a 100644 --- a/esda/moran.py +++ b/esda/moran.py @@ -2,23 +2,24 @@ Moran's I Spatial Autocorrelation Statistics """ -__author__ = "Sergio J. Rey , \ - Dani Arribas-Bel , \ - Levi John Wolf " -from libpysal.weights.spatial_lag import lag_spatial as slag -from .smoothing import assuncao_rate -from .tabular import _univariate_handler, _bivariate_handler -from .crand import ( - crand as _crand_plus, - njit as _njit, - _prepare_univariate, - _prepare_bivariate, +__author__ = ( + "Sergio J. Rey , " + "Dani Arribas-Bel , " + "Levi John Wolf " ) -from warnings import warn, simplefilter -from scipy import sparse -import scipy.stats as stats + +from warnings import simplefilter + import numpy as np -import tempfile +import scipy.stats as stats +from libpysal.weights.spatial_lag import lag_spatial as slag +from scipy import sparse + +from .crand import _prepare_univariate +from .crand import crand as _crand_plus +from .crand import njit as _njit +from .smoothing import assuncao_rate +from .tabular import _bivariate_handler, _univariate_handler __all__ = [ "Moran", @@ -162,7 +163,7 @@ def __init__( self.w = w self.permutations = permutations self.__moments() - self.I = self.__calc(self.z) + self.I = self.__calc(self.z) # noqa E741 self.z_norm = (self.I - self.EI) / self.seI_norm self.z_rand = (self.I - self.EI) / self.seI_rand @@ -189,7 +190,7 @@ def __init__( self.p_sim = (larger + 1.0) / (permutations + 1.0) self.EI_sim = sim.sum() / permutations self.seI_sim = np.array(sim).std() - self.VI_sim = self.seI_sim ** 2 + self.VI_sim = self.seI_sim**2 self.z_sim = (self.I - self.EI_sim) / self.seI_sim if self.z_sim > 0: self.p_z_sim = 1 - stats.norm.cdf(self.z_sim) @@ -219,8 +220,8 @@ def __moments(self): self.seI_norm = self.VI_norm ** (1 / 2.0) # variance under randomization - xd4 = z ** 4 - xd2 = z ** 2 + xd4 = z**4 + xd2 = z**2 k_num = xd4.sum() / n k_den = (xd2.sum() / n) ** 2 k = k_num / k_den @@ -250,32 +251,32 @@ def by_col( Parameters ---------- - df : pandas.DataFrame - a pandas dataframe with a geometry column - cols : string or list of string - name or list of names of columns to use to compute the statistic - w : pysal weights object - a weights object aligned with the dataframe. If not provided, this - is searched for in the dataframe's metadata - inplace : bool - a boolean denoting whether to operate on the dataframe inplace or to - return a series contaning the results of the computation. If - operating inplace, the derived columns will be named - 'column_moran' - pvalue : string - a string denoting which pvalue should be returned. Refer to the - the Moran statistic's documentation for available p-values - outvals : list of strings - list of arbitrary attributes to return as columns from the - Moran statistic - **stat_kws : keyword arguments - options to pass to the underlying statistic. For this, see the - documentation for the Moran statistic. + df : pandas.DataFrame + a pandas dataframe with a geometry column + cols : string or list of string + name or list of names of columns to use to compute the statistic + w : pysal weights object + a weights object aligned with the dataframe. If not provided, this + is searched for in the dataframe's metadata + inplace : bool + a boolean denoting whether to operate on the dataframe inplace or to + return a series contaning the results of the computation. If + operating inplace, the derived columns will be named 'column_moran' + pvalue : string + a string denoting which pvalue should be returned. Refer to the + the Moran statistic's documentation for available p-values + outvals : list of strings + list of arbitrary attributes to return as columns from the + Moran statistic + **stat_kws : dict + options to pass to the underlying statistic. For this, see the + documentation for the Moran statistic. Returns -------- - If inplace, None, and operation is conducted on dataframe in memory. Otherwise, - returns a copy of the dataframe with the relevant columns attached. + If inplace, None, and operation is conducted on dataframe + in memory. Otherwise, returns a copy of the dataframe with + the relevant columns attached. """ return _univariate_handler( @@ -411,7 +412,7 @@ def __init__(self, x, y, w, transformation="r", permutations=PERMUTATIONS): self.den = n - 1.0 # zx'zx = zy'zy = n-1 w.transform = transformation self.w = w - self.I = self.__calc(zy) + self.I = self.__calc(zy) # noqa E741 if permutations: nrp = np.random.permutation sim = [self.__calc(nrp(zy)) for i in range(permutations)] @@ -423,7 +424,7 @@ def __init__(self, x, y, w, transformation="r", permutations=PERMUTATIONS): self.p_sim = (larger + 1.0) / (permutations + 1.0) self.EI_sim = sim.sum() / permutations self.seI_sim = np.array(sim).std() - self.VI_sim = self.seI_sim ** 2 + self.VI_sim = self.seI_sim**2 self.z_sim = (self.I - self.EI_sim) / self.seI_sim if self.z_sim > 0: self.p_z_sim = 1 - stats.norm.cdf(self.z_sim) @@ -457,39 +458,39 @@ def by_col( Parameters ---------- - df : pandas.DataFrame - a pandas dataframe with a geometry column - X : list of strings - column name or list of column names to use as X values to compute - the bivariate statistic. If no Y is provided, pairwise comparisons - among these variates are used instead. - Y : list of strings - column name or list of column names to use as Y values to compute - the bivariate statistic. if no Y is provided, pariwise comparisons - among the X variates are used instead. - w : pysal weights object - a weights object aligned with the dataframe. If not provided, this - is searched for in the dataframe's metadata - inplace : bool - a boolean denoting whether to operate on the dataframe inplace or to - return a series contaning the results of the computation. If - operating inplace, the derived columns will be named - 'column_moran_local' - pvalue : string - a string denoting which pvalue should be returned. Refer to the - the Moran_BV statistic's documentation for available p-values - outvals : list of strings - list of arbitrary attributes to return as columns from the - Moran_BV statistic - **stat_kws : keyword arguments - options to pass to the underlying statistic. For this, see the - documentation for the Moran_BV statistic. - + df : pandas.DataFrame + a pandas dataframe with a geometry column + X : list of strings + column name or list of column names to use as X values to compute + the bivariate statistic. If no Y is provided, pairwise comparisons + among these variates are used instead. + Y : list of strings + column name or list of column names to use as Y values to compute + the bivariate statistic. if no Y is provided, pariwise comparisons + among the X variates are used instead. + w : pysal weights object + a weights object aligned with the dataframe. If not provided, this + is searched for in the dataframe's metadata + inplace : bool + a boolean denoting whether to operate on the dataframe inplace or to + return a series contaning the results of the computation. If + operating inplace, the derived columns will be named + 'column_moran_local' + pvalue : string + a string denoting which pvalue should be returned. Refer to the + the Moran_BV statistic's documentation for available p-values + outvals : list of strings + list of arbitrary attributes to return as columns from the + Moran_BV statistic + **stat_kws : keyword arguments + options to pass to the underlying statistic. For this, see the + documentation for the Moran_BV statistic. Returns -------- - If inplace, None, and operation is conducted on dataframe in memory. Otherwise, - returns a copy of the dataframe with the relevant columns attached. + If inplace, None, and operation is conducted on dataframe + in memory. Otherwise, returns a copy of the dataframe with + the relevant columns attached. """ return _bivariate_handler( @@ -587,10 +588,11 @@ def _Moran_BV_Matrix_array(variables, w, permutations=0, varnames=None): """ Base calculation for MORAN_BV_Matrix """ + + k = len(variables) if varnames is None: varnames = ["x{}".format(i) for i in range(k)] - k = len(variables) rk = list(range(0, k - 1)) results = {} for i in rk: @@ -753,38 +755,39 @@ def by_col( Parameters ---------- - df : pandas.DataFrame - a pandas dataframe with a geometry column - events : string or list of strings - one or more names where events are stored - populations : string or list of strings - one or more names where the populations corresponding to the - events are stored. If one population column is provided, it is - used for all event columns. If more than one population column - is provided but there is not a population for every event - column, an exception will be raised. - w : pysal weights object - a weights object aligned with the dataframe. If not provided, this - is searched for in the dataframe's metadata - inplace : bool - a boolean denoting whether to operate on the dataframe inplace or to - return a series contaning the results of the computation. If - operating inplace, the derived columns will be named - 'column_moran_rate' - pvalue : string - a string denoting which pvalue should be returned. Refer to the - the Moran_Rate statistic's documentation for available p-values - outvals : list of strings - list of arbitrary attributes to return as columns from the - Moran_Rate statistic - **stat_kws : keyword arguments - options to pass to the underlying statistic. For this, see the - documentation for the Moran_Rate statistic. + df : pandas.DataFrame + a pandas dataframe with a geometry column + events : string or list of strings + one or more names where events are stored + populations : string or list of strings + one or more names where the populations corresponding to the + events are stored. If one population column is provided, it is + used for all event columns. If more than one population column + is provided but there is not a population for every event + column, an exception will be raised. + w : pysal weights object + a weights object aligned with the dataframe. If not provided, this + is searched for in the dataframe's metadata + inplace : bool + a boolean denoting whether to operate on the dataframe inplace or to + return a series contaning the results of the computation. If + operating inplace, the derived columns will be named + 'column_moran_rate' + pvalue : string + a string denoting which pvalue should be returned. Refer to the + the Moran_Rate statistic's documentation for available p-values + outvals : list of strings + list of arbitrary attributes to return as columns from the + Moran_Rate statistic + **stat_kws : keyword arguments + options to pass to the underlying statistic. For this, see the + documentation for the Moran_Rate statistic. Returns -------- - If inplace, None, and operation is conducted on dataframe in memory. Otherwise, - returns a copy of the dataframe with the relevant columns attached. + If inplace, None, and operation is conducted on dataframe + in memory. Otherwise, returns a copy of the dataframe with + the relevant columns attached. """ if not inplace: @@ -884,11 +887,12 @@ class Moran_Local(object): seed : None/int Seed to ensure reproducibility of conditional randomizations. Must be set here, and not outside of the function, since numba - does not correctly interpret external seeds nor numpy.random.RandomState instances. + does not correctly interpret external seeds nor + numpy.random.RandomState instances. island_weight: - value to use as a weight for the "fake" neighbor for every island. If numpy.nan, - will propagate to the final local statistic depending on the `stat_func`. If 0, then - the lag is always zero for islands. + value to use as a weight for the "fake" neighbor for every island. + If numpy.nan, will propagate to the final local statistic depending + on the `stat_func`. If 0, then the lag is always zero for islands. Attributes ---------- @@ -1076,12 +1080,9 @@ def __quads(self): np = (1 - zp) * lp nn = (1 - zp) * (1 - lp) pn = zp * (1 - lp) - self.q = ( - self.quads[0] * pp - + self.quads[1] * np - + self.quads[2] * nn - + self.quads[3] * pn - ) + + q0, q1, q2, q3 = self.quads + self.q = (q0 * pp) + (q1 * np) + (q2 * nn) + (q3 * pn) def __moments(self): W = self.w.sparse @@ -1091,40 +1092,43 @@ def __moments(self): m2 = (z * z).sum() / n wi = np.asarray(W.sum(axis=1)).flatten() wi2 = np.asarray(W.multiply(W).sum(axis=1)).flatten() + # --------------------------------------------------------- # Conditional randomization null, Sokal 1998, Eqs. A7 & A8 # assume that division is as written, so that # a - b / (n - 1) means a - (b / (n-1)) # --------------------------------------------------------- - expectation = -(z ** 2 * wi) / ((n - 1) * m2) - variance = ( - (z / m2) ** 2 - * (n / (n - 2)) - * (wi2 - (wi ** 2 / (n - 1))) - * (m2 - (z ** 2 / (n - 1))) - ) + expectation = -(z**2 * wi) / ((n - 1) * m2) + var_term1 = (z / m2) ** 2 + var_term2 = (n / (n - 2)) + var_term3 = (wi2 - (wi**2 / (n - 1))) + var_term4 = (m2 - (z**2 / (n - 1))) + variance = var_term1 * var_term2 * var_term3 * var_term4 self.EIc = expectation self.VIc = variance + # --------------------------------------------------------- # Total randomization null, Sokal 1998, Eqs. A3 & A4* # --------------------------------------------------------- - m4 = z ** 4 / n - b2 = m4 / m2 ** 2 + m4 = z**4 / n + b2 = m4 / m2**2 expectation = -wi / (n - 1) + # assume that "avoiding identical subscripts" in :cite:`Anselin1995` - # includes i==h and i==k, we can use the form due to :cite:`sokal1998local` below. + # includes i==h and i==k, we can use the form due to + # :cite:`sokal1998local` below. + # wikh = _wikh_fast(W) # variance_anselin = (wi2 * (n - b2)/(n-1) # + 2*wikh*(2*b2 - n) / ((n-1)*(n-2)) # - wi**2/(n-1)**2) self.EI = expectation - self.VI = ( - wi2 * (n - b2) / (n - 1) - + (wi ** 2 - wi2) * (2 * b2 - n) / ((n - 1) * (n - 2)) - - (-wi / (n - 1)) ** 2 - ) + n1 = n - 1 + self.VI = wi2 * (n - b2) / n1 + self.VI += (wi**2 - wi2) * (2 * b2 - n) / (n1 * (n - 2)) + self.VI -= (-wi / n1) ** 2 @property def _statistic(self): @@ -1140,32 +1144,33 @@ def by_col( Parameters ---------- - df : pandas.DataFrame - a pandas dataframe with a geometry column - cols : string or list of string - name or list of names of columns to use to compute the statistic - w : pysal weights object - a weights object aligned with the dataframe. If not provided, this - is searched for in the dataframe's metadata - inplace : bool - a boolean denoting whether to operate on the dataframe inplace or to - return a series contaning the results of the computation. If - operating inplace, the derived columns will be named - 'column_moran_local' - pvalue : string - a string denoting which pvalue should be returned. Refer to the - the Moran_Local statistic's documentation for available p-values - outvals : list of strings - list of arbitrary attributes to return as columns from the - Moran_Local statistic - **stat_kws : keyword arguments - options to pass to the underlying statistic. For this, see the - documentation for the Moran_Local statistic. + df : pandas.DataFrame + a pandas dataframe with a geometry column + cols : string or list of string + name or list of names of columns to use to compute the statistic + w : pysal weights object + a weights object aligned with the dataframe. If not provided, this + is searched for in the dataframe's metadata + inplace : bool + a boolean denoting whether to operate on the dataframe inplace or to + return a series contaning the results of the computation. If + operating inplace, the derived columns will be named + 'column_moran_local' + pvalue : string + a string denoting which pvalue should be returned. Refer to the + the Moran_Local statistic's documentation for available p-values + outvals : list of strings + list of arbitrary attributes to return as columns from the + Moran_Local statistic + **stat_kws : dict + options to pass to the underlying statistic. For this, see the + documentation for the Moran_Local statistic. Returns -------- - If inplace, None, and operation is conducted on dataframe in memory. Otherwise, - returns a copy of the dataframe with the relevant columns attached. + If inplace, None, and operation is conducted on dataframe + in memory. Otherwise, returns a copy of the dataframe with + the relevant columns attached. """ return _univariate_handler( @@ -1217,11 +1222,12 @@ class Moran_Local_BV(object): seed : None/int Seed to ensure reproducibility of conditional randomizations. Must be set here, and not outside of the function, since numba - does not correctly interpret external seeds nor numpy.random.RandomState instances. + does not correctly interpret external seeds nor + numpy.random.RandomState instances. island_weight: - value to use as a weight for the "fake" neighbor for every island. If numpy.nan, - will propagate to the final local statistic depending on the `stat_func`. If 0, then - the lag is always zero for islands. + value to use as a weight for the "fake" neighbor for every island. + If numpy.nan, will propagate to the final local statistic depending + on the `stat_func`. If 0, then the lag is always zero for islands. Attributes ---------- @@ -1373,12 +1379,9 @@ def __quads(self): np = (1 - zp) * lp nn = (1 - zp) * (1 - lp) pn = zp * (1 - lp) - self.q = ( - self.quads[0] * pp - + self.quads[1] * np - + self.quads[2] * nn - + self.quads[3] * pn - ) + + q0, q1, q2, q3 = self.quads + self.q = (q0 * pp) + (q1 * np) + (q2 * nn) + (q3 * pn) @property def _statistic(self): @@ -1402,39 +1405,39 @@ def by_col( Parameters ---------- - df : pandas.DataFrame - a pandas dataframe with a geometry column - X : list of strings - column name or list of column names to use as X values to compute - the bivariate statistic. If no Y is provided, pairwise comparisons - among these variates are used instead. - Y : list of strings - column name or list of column names to use as Y values to compute - the bivariate statistic. if no Y is provided, pariwise comparisons - among the X variates are used instead. - w : pysal weights object - a weights object aligned with the dataframe. If not provided, this - is searched for in the dataframe's metadata - inplace : bool - a boolean denoting whether to operate on the dataframe inplace or to - return a series contaning the results of the computation. If - operating inplace, the derived columns will be named - 'column_moran_local_bv' - pvalue : string - a string denoting which pvalue should be returned. Refer to the - the Moran_Local_BV statistic's documentation for available p-values - outvals : list of strings - list of arbitrary attributes to return as columns from the - Moran_Local_BV statistic - **stat_kws : keyword arguments - options to pass to the underlying statistic. For this, see the - documentation for the Moran_Local_BV statistic. - + df : pandas.DataFrame + a pandas dataframe with a geometry column + X : list of strings + column name or list of column names to use as X values to compute + the bivariate statistic. If no Y is provided, pairwise comparisons + among these variates are used instead. + Y : list of strings + column name or list of column names to use as Y values to compute + the bivariate statistic. if no Y is provided, pariwise comparisons + among the X variates are used instead. + w : pysal weights object + a weights object aligned with the dataframe. If not provided, this + is searched for in the dataframe's metadata + inplace : bool + a boolean denoting whether to operate on the dataframe inplace or to + return a series contaning the results of the computation. If + operating inplace, the derived columns will be named + 'column_moran_local_bv' + pvalue : string + a string denoting which pvalue should be returned. Refer to the + the Moran_Local_BV statistic's documentation for available p-values + outvals : list of strings + list of arbitrary attributes to return as columns from the + Moran_Local_BV statistic + **stat_kws : dict + options to pass to the underlying statistic. For this, see the + documentation for the Moran_Local_BV statistic. Returns -------- - If inplace, None, and operation is conducted on dataframe in memory. Otherwise, - returns a copy of the dataframe with the relevant columns attached. + If inplace, None, and operation is conducted on dataframe + in memory. Otherwise, returns a copy of the dataframe with + the relevant columns attached. """ return _bivariate_handler( @@ -1492,9 +1495,9 @@ class Moran_Local_Rate(Moran_Local): Must be set here, and not outside of the function, since numba does not correctly interpret external seeds nor numpy.random.RandomState instances. island_weight: float - value to use as a weight for the "fake" neighbor for every island. If numpy.nan, - will propagate to the final local statistic depending on the `stat_func`. If 0, then - the lag is always zero for islands. + value to use as a weight for the "fake" neighbor for every island. + If numpy.nan, will propagate to the final local statistic depending + on the `stat_func`. If 0, then the lag is always zero for islands. Attributes ---------- @@ -1550,16 +1553,19 @@ class Moran_Local_Rate(Moran_Local): >>> e = np.array(f.by_col('SID79')) >>> b = np.array(f.by_col('BIR79')) >>> from esda.moran import Moran_Local_Rate - >>> lm = Moran_Local_Rate(e, b, w, transformation = "r", permutations = 99) + >>> lm = Moran_Local_Rate(e, b, w, transformation="r", permutations=99) >>> lm.q[:10] array([2, 4, 3, 1, 2, 1, 1, 4, 2, 4]) - >>> lm = Moran_Local_Rate(e, b, w, transformation = "r", permutations = 99, geoda_quads=True) + >>> lm = Moran_Local_Rate( + ... e, b, w, transformation = "r", permutations=99, geoda_quads=True + ) >>> lm.q[:10] array([3, 4, 2, 1, 3, 1, 1, 4, 3, 4]) Note random components result is slightly different values across architectures so the results have been removed from doctests and will be moved into unittests that are conditional on architectures + """ def __init__( @@ -1612,37 +1618,39 @@ def by_col( Parameters ---------- - df : pandas.DataFrame - a pandas dataframe with a geometry column - events : string or list of strings - one or more names where events are stored - populations : string or list of strings - one or more names where the populations corresponding to the - events are stored. If one population column is provided, it is - used for all event columns. If more than one population column - is provided but there is not a population for every event - column, an exception will be raised. - w : pysal weights object - a weights object aligned with the dataframe. If not provided, this - is searched for in the dataframe's metadata - inplace : bool - a boolean denoting whether to operate on the dataframe inplace or to - return a series contaning the results of the computation. If - operating inplace, the derived columns will be named 'column_moran_local_rate' - pvalue : string - a string denoting which pvalue should be returned. Refer to the - the Moran_Local_Rate statistic's documentation for available p-values - outvals : list of strings - list of arbitrary attributes to return as columns from the - Moran_Local_Rate statistic - **stat_kws : keyword arguments - options to pass to the underlying statistic. For this, see the - documentation for the Moran_Local_Rate statistic. + df : pandas.DataFrame + a pandas dataframe with a geometry column + events : string or list of strings + one or more names where events are stored + populations : string or list of strings + one or more names where the populations corresponding to the + events are stored. If one population column is provided, it is + used for all event columns. If more than one population column + is provided but there is not a population for every event + column, an exception will be raised. + w : pysal weights object + a weights object aligned with the dataframe. If not provided, this + is searched for in the dataframe's metadata + inplace : bool + a boolean denoting whether to operate on the dataframe + inplace or to return a series contaning the results of + the computation. If operating inplace, the derived columns + will be named 'column_moran_local_rate' + pvalue : string + a string denoting which pvalue should be returned. Refer to the + the Moran_Local_Rate statistic's documentation for available p-values + outvals : list of strings + list of arbitrary attributes to return as columns from the + Moran_Local_Rate statistic + **stat_kws : dict + options to pass to the underlying statistic. For this, see the + documentation for the Moran_Local_Rate statistic. Returns -------- - If inplace, None, and operation is conducted on dataframe in memory. Otherwise, - returns a copy of the dataframe with the relevant columns attached. + If inplace, None, and operation is conducted on dataframe + in memory. Otherwise, returns a copy of the dataframe with + the relevant columns attached. """ if not inplace: @@ -1711,7 +1719,9 @@ def _wikh_fast(W, sokal_correction=False): """ This computes the outer product of weights for each observation. - w_{i(kh)} = \sum_{k \neq i}^n \sum_{h \neq i}^n w_ik * w_hk + .. math:: + + w_{i(kh)} = \\sum_{k \neq i}^n \\sum_{h \neq i}^n w_ik * w_hk If the :cite:`sokal1998local` version is used, then we also have h \neq k Since this version introduces a simplification in the expression @@ -1720,14 +1730,14 @@ def _wikh_fast(W, sokal_correction=False): Arguments --------- - W : scipy sparse matrix - a sparse matrix describing the spatial relationships - between observations. - sokal_correction: bool - Whether to avoid self-neighbors in the summation of weights. - If False (default), then the outer product of all weights - for observation i are used, regardless if they are of the form - w_hh or w_kk. + W : scipy sparse matrix + a sparse matrix describing the spatial relationships + between observations. + sokal_correction : bool + Whether to avoid self-neighbors in the summation of weights. + If False (default), then the outer product of all weights + for observation i are used, regardless if they are of the form + w_hh or w_kk. Returns ------- diff --git a/esda/shape.py b/esda/shape.py index 3f103a77..c45ee5b7 100644 --- a/esda/shape.py +++ b/esda/shape.py @@ -15,7 +15,8 @@ def _cast(collection): Cast a collection to a pygeos geometry array. """ try: - import pygeos, geopandas + import geopandas + import pygeos except (ImportError, ModuleNotFoundError) as exception: raise type(exception)("pygeos and geopandas are required for shape statistics.") @@ -66,7 +67,6 @@ def get_angles(collection, return_indices=False): exploded = pygeos.get_parts(ga) coords = pygeos.get_coordinates(exploded) n_coords_per_geom = pygeos.get_num_coordinates(exploded) - n_parts_per_geom = pygeos.get_num_geometries(exploded) angles = numpy.asarray(_get_angles(coords, n_coords_per_geom)) if return_indices: return angles, numpy.repeat( @@ -85,11 +85,9 @@ def _get_angles(points, n_coords_per_geom): """ # Start at the first point of the first geometry offset = int(0) - start = points[0] on_geom = 0 on_coord = 0 result = [] - n_points = len(points) while True: # if we're on the last point before the closure point, if on_coord == (n_coords_per_geom[on_geom] - 1): @@ -125,7 +123,7 @@ def _get_angles(points, n_coords_per_geom): def isoperimetric_quotient(collection): - """ + r""" The Isoperimetric quotient, defined as the ratio of a polygon's area to the area of the equi-perimeter circle. @@ -206,7 +204,7 @@ def diameter_ratio(collection, rotated=True): if rotated: box = pygeos.minimum_rotated_rectangle(ga) coords = pygeos.get_coordinates(box) - a, b, c, d = (coords[0::5], coords[1::5], coords[2::5], coords[3::5]) + a, b, _, d = (coords[0::5], coords[1::5], coords[2::5], coords[3::5]) widths = numpy.sqrt(numpy.sum((a - b) ** 2, axis=1)) heights = numpy.sqrt(numpy.sum((a - d) ** 2, axis=1)) else: @@ -264,9 +262,12 @@ def convex_hull_ratio(collection): def fractal_dimension(collection, support="hex"): """ - The fractal dimension of the boundary of a shape, assuming a given spatial support for the geometries. + The fractal dimension of the boundary of a shape, assuming a given + spatial support for the geometries. - Note that this derivation assumes a specific ideal spatial support for the polygon, and is thus may not return valid results for complex or highly irregular geometries. + Note that this derivation assumes a specific ideal spatial support + for the polygon, and is thus may not return valid results for + complex or highly irregular geometries. """ ga = _cast(collection) P = pygeos.measurement.length(ga) @@ -279,7 +280,8 @@ def fractal_dimension(collection, support="hex"): return 2 * numpy.log(P / (2 * numpy.pi)) / numpy.log(A / numpy.pi) else: raise ValueError( - f"The support argument must be one of 'hex', 'circle', or 'square', but {support} was provided." + "The support argument must be one of 'hex', 'circle', or 'square', " + f"but {support} was provided." ) @@ -315,14 +317,16 @@ def squareness(collection): def rectangularity(collection): """ - Ratio of the area of the shape to the area of its minimum bounding rotated rectangle + Ratio of the area of the shape to the area + of its minimum bounding rotated rectangle Reveals a polygon’s degree of being curved inward. .. math:: \\frac{A}{A_{MBR}} - where :math:`A` is the area and :math:`A_{MBR}` is the area of minimum bounding + where :math:`A` is the area and :math:`A_{MBR}` + is the area of minimum bounding rotated rectangle. Notes @@ -414,7 +418,8 @@ def moment_of_inertia(collection): Thus, for constant unit mass at each boundary point, the MoI of this pointcloud is - \sum_i d_{i,c}^2 + .. math:: + \\sum_i d_{i,c}^2 where c is the centroid of the polygon @@ -441,7 +446,7 @@ def moa_ratio(collection): """ ga = _cast(collection) r = pygeos.measurement.length(ga) / (2 * numpy.pi) - return (numpy.pi * 0.5 * r ** 4) / second_areal_moment(ga) + return (numpy.pi * 0.5 * r**4) / second_areal_moment(ga) def nmi(collection): @@ -460,8 +465,9 @@ def second_areal_moment(collection): moment of area is actually the cross-moment of area between the X and Y dimensions: - I_xy = (1/24)\sum^{i=N}^{i=1} (x_iy_{i+1} + 2*x_iy_i + 2*x_{i+1}y_{i+1} + - x_{i+1}y_i)(x_iy_i - x_{i+1}y_i) + .. math:: + I_xy = (1/24)\\sum^{i=N}^{i=1} (x_iy_{i+1} + 2*x_iy_i + 2*x_{i+1}y_{i+1} + + x_{i+1}y_i)(x_iy_i - x_{i+1}y_i) where x_i, y_i is the current point and x_{i+1}, y_{i+1} is the next point, and where x_{n+1} = x_1, y_{n+1} = 1. @@ -483,7 +489,6 @@ def second_areal_moment(collection): for hole_ix in range(n_holes): hole = pygeos.get_coordinates(pygeos.get_interior_ring(ga, hole_ix)) result[i] -= _second_moa_ring(hole) - n_parts = pygeos.get_num_geometries(geometry) for part in pygeos.get_parts(geometry): result[i] += _second_moa_ring(pygeos.get_coordinates(part)) # must divide everything by 24 and flip if polygon is clockwise. @@ -500,12 +505,11 @@ def _second_moa_ring(points): for i in prange(len(points[:-1])): x_tail, y_tail = points[i] x_head, y_head = points[i + 1] - moi += (x_tail * y_head - x_head * y_tail) * ( - x_tail * y_head - + 2 * x_tail * y_tail - + 2 * x_head * y_head - + x_head * y_tail - ) + xtyh = x_tail * y_head + xhyt = x_head * y_tail + xtyt = x_tail * y_tail + xhyh = x_head * y_head + moi += (xtyh - xhyt) * (xtyh + 2 * xtyt + 2 * xhyh + xhyt) return moi diff --git a/esda/silhouettes.py b/esda/silhouettes.py index 2aa45f50..d732c6e9 100644 --- a/esda/silhouettes.py +++ b/esda/silhouettes.py @@ -1,26 +1,28 @@ +import warnings + import numpy as np -from scipy.sparse import csgraph as cg from scipy import sparse as sp +from scipy.sparse import csgraph as cg try: + import pandas as pd import sklearn.metrics as sk import sklearn.metrics.pairwise as skp - from sklearn.preprocessing import LabelEncoder - import pandas as pd + from sklearn.preprocessing import LabelEncoder # noqa F401 HAS_REQUIREMENTS = True -except ImportError as e: +except ImportError: HAS_REQUIREMENTS = False def _raise_initial_error(): missing = [] try: - import sklearn + import sklearn # noqa F401 except ImportError: missing.append("scikit-learn") try: - import pandas + import pandas # noqa F401 except ImportError: missing.append("pandas") raise ImportError( @@ -50,7 +52,8 @@ def path_silhouette( directed=False, ): """ - Compute a path silhouette for all observations :cite:`wolf2019geosilhouettes,Rousseeuw1987`. + Compute a path silhouette for all observations + :cite:`wolf2019geosilhouettes,Rousseeuw1987`. Parameters @@ -250,7 +253,8 @@ def path_silhouette( def boundary_silhouette(data, labels, W, metric=skp.euclidean_distances): """ - Compute the observation-level boundary silhouette score :cite:`wolf2019geosilhouettes`. + Compute the observation-level boundary silhouette + score :cite:`wolf2019geosilhouettes`. Parameters ---------- @@ -280,8 +284,8 @@ def boundary_silhouette(data, labels, W, metric=skp.euclidean_distances): So, instead of considering *all* clusters when finding the next-best-fit cluster, only clusters that `i` borders are considered. This is supposed to model the fact that, in spatially-constrained clustering, - observation i can only be reassigned from cluster c to cluster k if some observation - j neighbors i and also resides in k. + observation i can only be reassigned from cluster c to cluster k if + some observation j neighbors i and also resides in k. If an observation only neighbors its own cluster, i.e. is not on the boundary of a cluster, this value is zero. @@ -383,7 +387,10 @@ def silhouette_alist(data, labels, alist, indices=None, metric=skp.euclidean_dis containing `focal` id, `neighbor` id, and `label_focal`, and `label_neighbor`, this computes: - `d(i,label_neighbor) - d(i,label_focal) / (max(d(i,label_neighbor), d(i,label_focal)))` + .. math:: + + d(i,label_neighbor) - d(i,label_focal) + / (max(d(i,label_neighbor), d(i,label_focal))) Parameters ---------- @@ -465,17 +472,17 @@ def silhouette_alist(data, labels, alist, indices=None, metric=skp.euclidean_dis neighbor_mask = np.nonzero(neighbor_mask.values)[0] if len(neighbor_mask) == 0: sils.append(0) - warn( - "A link ({},{}) has been found to have an empty set of neighbors. " - " This may happen when a label assignment is missing for the neighbor unit." - " Check that no labels are missing.".format(row.focal, row.neighbor) + warnings.warn( + f"A link ({row.focal},{row.neighbor}) has been found to have " + "an empty set of neighbors. This may happen when a label " + "assignment is missing for the neighbor unit. " + "Check that no labels are missing." ) continue outer_distance = full_distances[i_Xc, neighbor_mask].mean() - sils.append( - (outer_distance - within_cluster) - / np.maximum(outer_distance, within_cluster) - ) + dist_diff = outer_distance - within_cluster + dist_max = np.maximum(outer_distance, within_cluster) + sils.append(dist_diff / dist_max) result["silhouette"] = sils return result.sort_values("focal").reset_index(drop=True) @@ -492,22 +499,22 @@ def nearest_label( Parameters ---------- data : (N,P) array to cluster on or DataFrame indexed on the same values as - that in alist.focal/alist.neighbor - labels: (N,) array containing classifications, indexed on the same values - as that in alist.focal/alist.neighbor - metric : callable, array, - a function that takes an argument (data) and returns the all-pairs - distances/dissimilarity between observations. + that in alist.focal/alist.neighbor + labels : (N,) array containing classifications, indexed on the same values + as that in alist.focal/alist.neighbor + metric : callable, array, + a function that takes an argument (data) and returns the all-pairs + distances/dissimilarity between observations. return_distance: bool - Whether to return the distance from the observation to its nearest - cluster in feature space. If True, the tuple of (nearest_label, dissim) - is returned. If False, only the nearest_label array is returned. + Whether to return the distance from the observation to its nearest + cluster in feature space. If True, the tuple of (nearest_label, dissim) + is returned. If False, only the nearest_label array is returned. keep_self: bool - whether to allow observations to use their current cluster as their - nearest label. If True, an observation's existing cluster assignment can - also be the cluster it is closest to. If False, an observation's existing - cluster assignment cannot be the cluster it is closest to. This would mean - the function computes the nearest *alternative* cluster. + whether to allow observations to use their current cluster as their + nearest label. If True, an observation's existing cluster assignment can + also be the cluster it is closest to. If False, an observation's existing + cluster assignment cannot be the cluster it is closest to. This would mean + the function computes the nearest *alternative* cluster. Returns ------- @@ -539,7 +546,6 @@ def nearest_label( nearest_label_dissim = np.empty(labels.shape) for label in unique_labels: this_label_mask = labels == label - n_in_label = this_label_mask.sum() this_label_mask = np.nonzero(this_label_mask)[0] next_best_fit = np.ones(this_label_mask.shape) * np.inf next_best_label = np.empty(this_label_mask.shape, dtype=labels.dtype) diff --git a/esda/smaup.py b/esda/smaup.py index 127b4ed7..ba3f1647 100644 --- a/esda/smaup.py +++ b/esda/smaup.py @@ -13,7 +13,8 @@ class Smaup(object): - """S-maup: Statistical Test to Measure the Sensitivity to the Modifiable Areal Unit Problem + """S-maup: Statistical Test to Measure the Sensitivity + to the Modifiable Areal Unit Problem Parameters ---------- diff --git a/esda/smoothing.py b/esda/smoothing.py index 2f312e53..599fb1cc 100644 --- a/esda/smoothing.py +++ b/esda/smoothing.py @@ -1,4 +1,5 @@ from __future__ import division + """ Apply smoothing to rate computation @@ -12,24 +13,59 @@ """ -__author__ = "Myunghwa Hwang , David Folch , Luc Anselin , Serge Rey , " + "David Folch , " + "Luc Anselin , " + "Serge Rey = cumsum_threshold).nonzero()[0][0] if reordered_w[median_inx] == cumsum_threshold and len(d) - 1 > median_inx: - return np.sort(d)[median_inx:median_inx + 2].mean() + return np.sort(d)[median_inx : median_inx + 2].mean() # noqa E203 return np.sort(d)[median_inx] @@ -155,9 +191,9 @@ def sum_by_n(d, w, n): """ t = len(d) - h = t // n #must be floor! + h = t // n # must be floor! d = d * w - return np.array([sum(d[i: i + h]) for i in range(0, t, h)]) + return np.array([sum(d[i : i + h]) for i in range(0, t, h)]) # noqa E203 def crude_age_standardization(e, b, n): @@ -165,12 +201,13 @@ def crude_age_standardization(e, b, n): Parameters ---------- - e : array - (n*h, 1), event variable measured for each age group across n spatial units - b : array - (n*h, 1), population at risk variable measured for each age group across n spatial units - n : integer - the number of spatial units + e : array + (n*h, 1), event variable measured for each age group across n spatial units + b : array + (n*h, 1), population at risk variable measured for each age + group across n spatial units + n : integer + the number of spatial units Notes ----- @@ -218,16 +255,17 @@ def direct_age_standardization(e, b, s, n, alpha=0.05): Parameters ---------- - e : array - (n*h, 1), event variable measured for each age group across n spatial units - b : array - (n*h, 1), population at risk variable measured for each age group across n spatial units - s : array - (n*h, 1), standard population for each age group across n spatial units - n : integer - the number of spatial units - alpha : float - significance level for confidence interval + e : array + (n*h, 1), event variable measured for each age group across n spatial units + b : array + (n*h, 1), population at risk variable measured for each + age group across n spatial units + s : array + (n*h, 1), standard population for each age group across n spatial units + n : integer + the number of spatial units + alpha : float + significance level for confidence interval Notes ----- @@ -256,10 +294,11 @@ def direct_age_standardization(e, b, s, n, alpha=0.05): >>> b = np.array([1000, 1000, 1100, 900, 1000, 900, 1100, 900]) For direct age standardization, we also need the data for standard population. - Standard population is a reference population-at-risk (e.g., population distribution for the U.S.) - whose age distribution can be used as a benchmarking point for comparing age distributions - across regions (e.g., population distribution for Arizona and California). - Another array including standard population is created. + Standard population is a reference population-at-risk (e.g., population + distribution for the U.S.) whose age distribution can be used as a benchmarking + point for comparing age distributions across regions (e.g., population + distribution for Arizona and California). Another array including standard + population is created. >>> s = np.array([1000, 900, 1000, 900, 1000, 900, 1000, 900]) @@ -281,11 +320,11 @@ def direct_age_standardization(e, b, s, n, alpha=0.05): var_estimate = sum_by_n(e, np.square(age_weight), n) g_a = np.square(adjusted_r) / var_estimate g_b = var_estimate / adjusted_r - k = [age_weight[i:i + len(b) // n].max() for i in range(0, len(b), - len(b) // n)] + _b = len(b) + _rb = range(0, _b, _b // n) + k = [age_weight[i : i + _b // n].max() for i in _rb] # noqa E203 g_a_k = np.square(adjusted_r + k) / (var_estimate + np.square(k)) g_b_k = (var_estimate + np.square(k)) / (adjusted_r + k) - summed_b = sum_by_n(b, 1.0, n) res = [] for i in range(len(adjusted_r)): if adjusted_r[i] == 0: @@ -303,18 +342,21 @@ def indirect_age_standardization(e, b, s_e, s_b, n, alpha=0.05): Parameters ---------- - e : array - (n*h, 1), event variable measured for each age group across n spatial units - b : array - (n*h, 1), population at risk variable measured for each age group across n spatial units - s_e : array - (n*h, 1), event variable measured for each age group across n spatial units in a standard population - s_b : array - (n*h, 1), population variable measured for each age group across n spatial units in a standard population - n : integer - the number of spatial units - alpha : float - significance level for confidence interval + e : array + (n*h, 1), event variable measured for each age group across n spatial units + b : array + (n*h, 1), population at risk variable measured for each age + group across n spatial units + s_e : array + (n*h, 1), event variable measured for each age group across + n spatial units in a standard population + s_b : array + (n*h, 1), population variable measured for each age group across + n spatial units in a standard population + n : integer + the number of spatial units + alpha : float + significance level for confidence interval Notes ----- @@ -342,13 +384,14 @@ def indirect_age_standardization(e, b, s_e, s_b, n, alpha=0.05): >>> b = np.array([100, 100, 110, 90, 100, 90, 110, 90]) - For indirect age standardization, we also need the data for standard population and event. - Standard population is a reference population-at-risk (e.g., population distribution for the U.S.) - whose age distribution can be used as a benchmarking point for comparing age distributions - across regions (e.g., popoulation distribution for Arizona and California). - When the same concept is applied to the event variable, - we call it standard event (e.g., the number of cancer patients in the U.S.). - Two additional arrays including standard population and event are created. + For indirect age standardization, we also need the data for standard population + and event. Standard population is a reference population-at-risk (e.g., + population distribution for the U.S.) whose age distribution can be used as a + benchmarking point for comparing age distributions across regions (e.g., + popoulation distribution for Arizona and California). When the same concept is + applied to the event variable, we call it standard event (e.g., the number of + cancer patients in the U.S.). Two additional arrays including standard population + and event are created. >>> s_e = np.array([100, 45, 120, 100, 50, 30, 200, 80]) >>> s_b = np.array([1000, 900, 1000, 900, 1000, 900, 1000, 900]) @@ -384,16 +427,19 @@ def standardized_mortality_ratio(e, b, s_e, s_b, n): Parameters ---------- - e : array - (n*h, 1), event variable measured for each age group across n spatial units - b : array - (n*h, 1), population at risk variable measured for each age group across n spatial units - s_e : array - (n*h, 1), event variable measured for each age group across n spatial units in a standard population - s_b : array - (n*h, 1), population variable measured for each age group across n spatial units in a standard population - n : integer - the number of spatial units + e : array + (n*h, 1), event variable measured for each age group across n spatial units + b : array + (n*h, 1), population at risk variable measured for each age + group across n spatial units + s_e : array + (n*h, 1), event variable measured for each age group across + n spatial units in a standard population + s_b : array + (n*h, 1), population variable measured for each age group + across n spatial units in a standard population + n : integer + the number of spatial units Notes ----- @@ -452,14 +498,14 @@ def choynowski(e, b, n, threshold=None): Parameters ---------- - e : array(n*h, 1) - event variable measured for each age group across n spatial units - b : array(n*h, 1) - population at risk variable measured for each age group across n spatial units - n : integer - the number of spatial units - threshold : float - Returns zero for any p-value greater than threshold + e : array(n*h, 1) + event variable measured for each age group across n spatial units + b : array(n*h, 1) + population at risk variable measured for each age group across n spatial units + n : integer + the number of spatial units + threshold : float + Returns zero for any p-value greater than threshold Notes ----- @@ -563,6 +609,7 @@ def assuncao_rate(e, b): ebi_v = ebi_a + ebi_b / b return (y - ebi_b) / np.sqrt(ebi_v) + class _Smoother(object): """ This is a helper class that implements things that all smoothers should do. @@ -572,11 +619,12 @@ class _Smoother(object): maybe headbanging triples), since they're literally only inits + one attribute. """ + def __init__(self): pass @classmethod - def by_col(cls, df, e,b, inplace=False, **kwargs): + def by_col(cls, df, e, b, inplace=False, **kwargs): """ Compute smoothing by columns in a dataframe. @@ -616,16 +664,19 @@ def by_col(cls, df, e,b, inplace=False, **kwargs): try: assert len(e) == len(b) except AssertionError: - raise ValueError('There is no one-to-one mapping between event' - ' variable and population at risk variable!') - for ei, bi in zip(e,b): + raise ValueError( + "There is no one-to-one mapping between event" + " variable and population at risk variable!" + ) + for ei, bi in zip(e, b): ename = ei bname = bi ei = df[ename] bi = df[bname] - outcol = '_'.join(('-'.join((ename, bname)), cls.__name__.lower())) + outcol = "_".join(("-".join((ename, bname)), cls.__name__.lower())) df[outcol] = cls(ei, bi, **kwargs).r + class Excess_Risk(_Smoother): """Excess Risk @@ -649,8 +700,8 @@ class Excess_Risk(_Smoother): >>> import libpysal >>> stl = libpysal.io.open(libpysal.examples.get_path('stl_hom.csv'), 'r') - The 11th and 14th columns in stl_hom.csv includes the number of homocides and population. - Creating two arrays from these columns. + The 11th and 14th columns in stl_hom.csv includes the number + of homocides and population. Creating two arrays from these columns. >>> stl_e, stl_b = np.array(stl[:,10]), np.array(stl[:,13]) @@ -658,7 +709,8 @@ class Excess_Risk(_Smoother): >>> er = Excess_Risk(stl_e, stl_b) - Extracting the excess risk values through the property r of the Excess_Risk instance, er + Extracting the excess risk values through + the property r of the Excess_Risk instance, er >>> er.r[:10] array([[0.20665681], @@ -673,9 +725,10 @@ class Excess_Risk(_Smoother): [0.25821905]]) """ + def __init__(self, e, b): - e = np.asarray(e).reshape(-1,1) - b = np.asarray(b).reshape(-1,1) + e = np.asarray(e).reshape(-1, 1) + b = np.asarray(b).reshape(-1, 1) r_mean = e.sum() * 1.0 / b.sum() self.r = e * 1.0 / (b * r_mean) @@ -705,8 +758,8 @@ class Empirical_Bayes(_Smoother): >>> stl_ex = libpysal.examples.load_example('stl') >>> stl = libpysal.io.open(stl_ex.get_path('stl_hom.csv'), 'r') - The 11th and 14th columns in stl_hom.csv includes the number of homocides and population. - Creating two arrays from these columns. + The 11th and 14th columns in stl_hom.csv includes + the number of homocides and population. Creating two arrays from these columns. >>> stl_e, stl_b = np.array(stl[:,10]), np.array(stl[:,13]) @@ -714,7 +767,8 @@ class Empirical_Bayes(_Smoother): >>> eb = Empirical_Bayes(stl_e, stl_b) - Extracting the risk values through the property r of the Empirical_Bayes instance, eb + Extracting the risk values through the property + r of the Empirical_Bayes instance, eb >>> eb.r[:10] array([[2.36718950e-05], @@ -729,9 +783,10 @@ class Empirical_Bayes(_Smoother): [3.02748380e-05]]) """ + def __init__(self, e, b): - e = np.asarray(e).reshape(-1,1) - b = np.asarray(b).reshape(-1,1) + e = np.asarray(e).reshape(-1, 1) + b = np.asarray(b).reshape(-1, 1) e_sum, b_sum = e.sum() * 1.0, b.sum() * 1.0 r_mean = e_sum / b_sum rate = e * 1.0 / b @@ -742,6 +797,7 @@ def __init__(self, e, b): weight = r_var / (r_var + r_mean / b) self.r = weight * rate + (1.0 - weight) * r_mean + class _Spatial_Smoother(_Smoother): """ This is a helper class that implements things that all the things that @@ -753,11 +809,12 @@ class _Spatial_Smoother(_Smoother): maybe headbanging triples), since they're literally only inits + one attribute. """ + def __init__(self): pass @classmethod - def by_col(cls, df, e,b, w=None, inplace=False, **kwargs): + def by_col(cls, df, e, b, w=None, inplace=False, **kwargs): """ Compute smoothing by columns in a dataframe. @@ -804,9 +861,11 @@ def by_col(cls, df, e,b, w=None, inplace=False, **kwargs): if isinstance(w, W): found = True if not found: - raise Exception('Weights not provided and no weights attached to frame!' - ' Please provide a weight or attach a weight to the' - ' dataframe') + raise Exception( + "Weights not provided and no weights attached to frame!" + " Please provide a weight or attach a weight to the" + " dataframe" + ) if isinstance(w, W): w = [w] * len(e) if len(b) == 1 and len(e) > 1: @@ -814,16 +873,19 @@ def by_col(cls, df, e,b, w=None, inplace=False, **kwargs): try: assert len(e) == len(b) except AssertionError: - raise ValueError('There is no one-to-one mapping between event' - ' variable and population at risk variable!') + raise ValueError( + "There is no one-to-one mapping between event" + " variable and population at risk variable!" + ) for ei, bi, wi in zip(e, b, w): ename = ei bname = bi ei = df[ename] bi = df[bname] - outcol = '_'.join(('-'.join((ename, bname)), cls.__name__.lower())) + outcol = "_".join(("-".join((ename, bname)), cls.__name__.lower())) df[outcol] = cls(ei, bi, w=wi, **kwargs).r + class Spatial_Empirical_Bayes(_Spatial_Smoother): """Spatial Empirical Bayes Smoothing @@ -850,8 +912,8 @@ class Spatial_Empirical_Bayes(_Spatial_Smoother): >>> stl_ex = libpysal.examples.load_example('stl') >>> stl = libpysal.io.open(stl_ex.get_path('stl_hom.csv'), 'r') - The 11th and 14th columns in stl_hom.csv includes the number of homocides and population. - Creating two arrays from these columns. + The 11th and 14th columns in stl_hom.csv includes the number + of homocides and population. Creating two arrays from these columns. >>> stl_e, stl_b = np.array(stl[:,10]), np.array(stl[:,13]) @@ -860,11 +922,13 @@ class Spatial_Empirical_Bayes(_Spatial_Smoother): >>> stl_w = libpysal.io.open(stl_ex.get_path('stl.gal'), 'r').read() Ensuring that the elements in the spatial weights instance are ordered - by the given sequential numbers from 1 to the number of observations in stl_hom.csv + by the given sequential numbers from 1 to the number of observations + in stl_hom.csv >>> if not stl_w.id_order_set: stl_w.id_order = range(1,len(stl) + 1) - Creating an instance of Spatial_Empirical_Bayes class using stl_e, stl_b, and stl_w + Creating an instance of Spatial_Empirical_Bayes class + using stl_e, stl_b, and stl_w >>> from esda.smoothing import Spatial_Empirical_Bayes >>> s_eb = Spatial_Empirical_Bayes(stl_e, stl_b, stl_w) @@ -883,14 +947,18 @@ class Spatial_Empirical_Bayes(_Spatial_Smoother): [3.73034109e-05], [3.47270722e-05]]) """ + def __init__(self, e, b, w): if not w.id_order_set: - raise ValueError("w id_order must be set to align with the order of e an b") - e = np.asarray(e).reshape(-1,1) - b = np.asarray(b).reshape(-1,1) + raise ValueError( + "The `id_order` of `w` must be set to " + "align with the order of `e` and `b`" + ) + e = np.asarray(e).reshape(-1, 1) + b = np.asarray(b).reshape(-1, 1) r_mean = Spatial_Rate(e, b, w).r rate = e * 1.0 / b - r_var_left = np.ones_like(e) * 1. + r_var_left = np.ones_like(e) * 1.0 ngh_num = np.ones_like(e) bi = slag(w, b) + b for i, idv in enumerate(w.id_order): @@ -905,6 +973,7 @@ def __init__(self, e, b, w): r_var[r_var < 0] = 0.0 self.r = r_mean + (rate - r_mean) * (r_var / (r_var + (r_mean / b))) + class Spatial_Rate(_Spatial_Smoother): """Spatial Rate Smoothing @@ -931,8 +1000,8 @@ class Spatial_Rate(_Spatial_Smoother): >>> stl_ex = libpysal.examples.load_example('stl') >>> stl = libpysal.io.open(stl_ex.get_path('stl_hom.csv'), 'r') - The 11th and 14th columns in stl_hom.csv includes the number of homocides and population. - Creating two arrays from these columns. + The 11th and 14th columns in stl_hom.csv includes the + number of homocides and population. Creating two arrays from these columns. >>> stl_e, stl_b = np.array(stl[:,10]), np.array(stl[:,13]) @@ -941,7 +1010,8 @@ class Spatial_Rate(_Spatial_Smoother): >>> stl_w = libpysal.io.open(stl_ex.get_path('stl.gal'), 'r').read() Ensuring that the elements in the spatial weights instance are ordered - by the given sequential numbers from 1 to the number of observations in stl_hom.csv + by the given sequential numbers from 1 to the number + of observations in stl_hom.csv >>> if not stl_w.id_order_set: stl_w.id_order = range(1,len(stl) + 1) @@ -964,16 +1034,19 @@ class Spatial_Rate(_Spatial_Smoother): [4.26204928e-05], [3.47270722e-05]]) """ + def __init__(self, e, b, w): if not w.id_order_set: - raise ValueError("w id_order must be set to align with the order of e and b") + raise ValueError( + "w id_order must be set to align with the order of e and b" + ) else: - e = np.asarray(e).reshape(-1,1) - b = np.asarray(b).reshape(-1,1) - w.transform = 'b' + e = np.asarray(e).reshape(-1, 1) + b = np.asarray(b).reshape(-1, 1) + w.transform = "b" w_e, w_b = slag(w, e), slag(w, b) self.r = (e + w_e) / (b + w_b) - w.transform = 'o' + w.transform = "o" class Kernel_Smoother(_Spatial_Smoother): @@ -1020,7 +1093,8 @@ class Kernel_Smoother(_Spatial_Smoother): >>> kr = Kernel_Smoother(e, b, kw) - Extracting the smoothed rates through the property r of the Kernel_Smoother instance + Extracting the smoothed rates through the + property r of the Kernel_Smoother instance >>> kr.r array([[0.10543301], @@ -1030,14 +1104,17 @@ class Kernel_Smoother(_Spatial_Smoother): [0.04756872], [0.04845298]]) """ + def __init__(self, e, b, w): if type(w) != Kernel: - raise Error('w must be an instance of Kernel weights') + raise ValueError("w must be an instance of Kernel weights") if not w.id_order_set: - raise ValueError("w id_order must be set to align with the order of e and b") + raise ValueError( + "w id_order must be set to align with the order of e and b" + ) else: - e = np.asarray(e).reshape(-1,1) - b = np.asarray(b).reshape(-1,1) + e = np.asarray(e).reshape(-1, 1) + b = np.asarray(b).reshape(-1, 1) w_e, w_b = slag(w, e), slag(w, b) self.r = w_e / w_b @@ -1047,22 +1124,23 @@ class Age_Adjusted_Smoother(_Spatial_Smoother): Parameters ---------- - e : array (n*h, 1) - event variable measured for each age group across n spatial units - b : array (n*h, 1) - population at risk variable measured for each age group across n spatial units - w : spatial weights instance - s : array (n*h, 1) - standard population for each age group across n spatial units + e : array (n*h, 1) + event variable measured for each age group across n spatial units + b : array (n*h, 1) + population at risk variable measured for each age group across n spatial units + w : spatial weights instance + s : array (n*h, 1) + standard population for each age group across n spatial units Attributes ---------- - r : array (n, 1) - rate values from spatial rate smoothing + r : array (n, 1) + rate values from spatial rate smoothing Notes ----- - Weights used to smooth age-specific events and populations are simple binary weights + Weights used to smooth age-specific events + and populations are simple binary weights Examples -------- @@ -1097,34 +1175,36 @@ class Age_Adjusted_Smoother(_Spatial_Smoother): >>> ar = Age_Adjusted_Smoother(e, b, kw, s) - Extracting the smoothed rates through the property r of the Age_Adjusted_Smoother instance + Extracting the smoothed rates through the property r of + the Age_Adjusted_Smoother instance >>> ar.r array([0.10519625, 0.08494318, 0.06440072, 0.06898604, 0.06952076, 0.05020968]) """ + def __init__(self, e, b, w, s, alpha=0.05): e = np.asarray(e).reshape(-1, 1) b = np.asarray(b).reshape(-1, 1) s = np.asarray(s).flatten() t = len(e) h = t // w.n - w.transform = 'b' + w.transform = "b" e_n, b_n = [], [] for i in range(h): e_n.append(slag(w, e[i::h]).tolist()) b_n.append(slag(w, b[i::h]).tolist()) - e_n = np.array(e_n).reshape((1, t), order='F')[0] - b_n = np.array(b_n).reshape((1, t), order='F')[0] + e_n = np.array(e_n).reshape((1, t), order="F")[0] + b_n = np.array(b_n).reshape((1, t), order="F")[0] e_n = e_n.reshape(s.shape) b_n = b_n.reshape(s.shape) r = direct_age_standardization(e_n, b_n, s, w.n, alpha=alpha) self.r = np.array([i[0] for i in r]) - w.transform = 'o' + w.transform = "o" - @_requires('pandas') + @_requires("pandas") @classmethod - def by_col(cls, df, e,b, w=None, s=None, **kwargs): + def by_col(cls, df, e, b, w=None, s=None, **kwargs): """ Compute smoothing by columns in a dataframe. @@ -1162,6 +1242,7 @@ def by_col(cls, df, e,b, w=None, s=None, **kwargs): if s is None: raise Exception('Standard population variable "s" must be supplied.') import pandas as pd + if isinstance(e, str): e = [e] if isinstance(b, str): @@ -1176,14 +1257,17 @@ def by_col(cls, df, e,b, w=None, s=None, **kwargs): found = True break if not found: - raise Exception('Weights not provided and no weights attached to frame!' - ' Please provide a weight or attach a weight to the' - ' dataframe.') + raise Exception( + "Weights not provided and no weights attached to frame!" + " Please provide a weight or attach a weight to the" + " dataframe." + ) if isinstance(w, W): w = [w] * len(e) if not all(isinstance(wi, W) for wi in w): - raise Exception('Weights object must be an instance of ' - ' libpysal.weights.W!') + raise Exception( + "Weights object must be an instance of " " libpysal.weights.W!" + ) b = b * len(e) if len(b) == 1 and len(e) > 1 else b s = s * len(e) if len(s) == 1 and len(e) > 1 else s try: @@ -1191,17 +1275,18 @@ def by_col(cls, df, e,b, w=None, s=None, **kwargs): assert len(e) == len(s) assert len(e) == len(w) except AssertionError: - raise ValueError('There is no one-to-one mapping between event' - ' variable and population at risk variable, and ' - ' standard population variable, and spatial ' - ' weights!') + raise ValueError( + "There is no one-to-one mapping between event" + " variable and population at risk variable, and " + " standard population variable, and spatial " + " weights!" + ) rdf = [] max_len = 0 for ei, bi, wi, si in zip(e, b, w, s): ename = ei bname = bi - h = len(ei) // wi.n - outcol = '_'.join(('-'.join((ename, bname)), cls.__name__.lower())) + outcol = "_".join(("-".join((ename, bname)), cls.__name__.lower())) this_r = cls(df[ei], df[bi], w=wi, s=df[si], **kwargs).r max_len = 0 if len(this_r) > max_len else max_len rdf.append((outcol, this_r.tolist())) @@ -1238,8 +1323,8 @@ class Disk_Smoother(_Spatial_Smoother): >>> stl_ex = libpysal.examples.load_example('stl') >>> stl = libpysal.io.open(stl_ex.get_path('stl_hom.csv'), 'r') - The 11th and 14th columns in stl_hom.csv includes the number of homocides and population. - Creating two arrays from these columns. + The 11th and 14th columns in stl_hom.csv includes the + number of homocides and population. Creating two arrays from these columns. >>> stl_e, stl_b = np.array(stl[:,10]), np.array(stl[:,13]) @@ -1248,7 +1333,8 @@ class Disk_Smoother(_Spatial_Smoother): >>> stl_w = libpysal.io.open(stl_ex.get_path('stl.gal'), 'r').read() Ensuring that the elements in the spatial weights instance are ordered - by the given sequential numbers from 1 to the number of observations in stl_hom.csv + by the given sequential numbers from 1 to the number of + observations in stl_hom.csv >>> if not stl_w.id_order_set: stl_w.id_order = range(1,len(stl) + 1) @@ -1274,15 +1360,17 @@ class Disk_Smoother(_Spatial_Smoother): def __init__(self, e, b, w): if not w.id_order_set: - raise ValueError("w id_order must be set to align with the order of e and b") + raise ValueError( + "w id_order must be set to align with the order of e and b" + ) else: - e = np.asarray(e).reshape(-1,1) - b = np.asarray(b).reshape(-1,1) + e = np.asarray(e).reshape(-1, 1) + b = np.asarray(b).reshape(-1, 1) r = e * 1.0 / b weight_sum = [] for i in w.id_order: weight_sum.append(sum(w.weights[i])) - self.r = slag(w, r) / np.array(weight_sum).reshape(-1,1) + self.r = slag(w, r) / np.array(weight_sum).reshape(-1, 1) class Spatial_Median_Rate(_Spatial_Smoother): @@ -1318,8 +1406,8 @@ class Spatial_Median_Rate(_Spatial_Smoother): >>> stl_ex = libpysal.examples.load_example('stl') >>> stl = libpysal.io.open(stl_ex.get_path('stl_hom.csv'), 'r') - The 11th and 14th columns in stl_hom.csv includes the number of homocides and population. - Creating two arrays from these columns. + The 11th and 14th columns in stl_hom.csv includes the number of + homocides and population. Creating two arrays from these columns. >>> stl_e, stl_b = np.array(stl[:,10]), np.array(stl[:,13]) @@ -1328,7 +1416,8 @@ class Spatial_Median_Rate(_Spatial_Smoother): >>> stl_w = libpysal.io.open(stl_ex.get_path('stl.gal'), 'r').read() Ensuring that the elements in the spatial weights instance are ordered - by the given sequential numbers from 1 to the number of observations in stl_hom.csv + by the given sequential numbers from 1 to the number of + observations in stl_hom.csv >>> if not stl_w.id_order_set: stl_w.id_order = range(1,len(stl) + 1) @@ -1336,7 +1425,8 @@ class Spatial_Median_Rate(_Spatial_Smoother): >>> smr0 = Spatial_Median_Rate(stl_e,stl_b,stl_w) - Extracting the computed rates through the property r of the Spatial_Median_Rate instance + Extracting the computed rates through the property + r of the Spatial_Median_Rate instance >>> smr0.r[:10] array([3.96047383e-05, 3.55386859e-05, 3.28308921e-05, 4.30731238e-05, @@ -1347,7 +1437,8 @@ class Spatial_Median_Rate(_Spatial_Smoother): >>> smr1 = Spatial_Median_Rate(stl_e,stl_b,stl_w,iteration=5) - Extracting the computed rates through the property r of the Spatial_Median_Rate instance + Extracting the computed rates through the property r + of the Spatial_Median_Rate instance >>> smr1.r[:10] array([3.11293620e-05, 2.95956330e-05, 3.11293620e-05, 3.10159267e-05, @@ -1359,7 +1450,8 @@ class Spatial_Median_Rate(_Spatial_Smoother): >>> smr2 = Spatial_Median_Rate(stl_e,stl_b,stl_w,aw=stl_b) - Extracting the computed rates through the property r of the Spatial_Median_Rate instance + Extracting the computed rates through the property + r of the Spatial_Median_Rate instance >>> smr2.r[:10] array([5.77412020e-05, 4.46449551e-05, 5.77412020e-05, 5.77412020e-05, @@ -1371,7 +1463,8 @@ class Spatial_Median_Rate(_Spatial_Smoother): >>> smr3 = Spatial_Median_Rate(stl_e,stl_b,stl_w,aw=stl_b,iteration=5) - Extracting the computed rates through the property r of the Spatial_Median_Rate instance + Extracting the computed rates through the property + r of the Spatial_Median_Rate instance >>> smr3.r[:10] array([3.61363528e-05, 4.46449551e-05, 3.61363528e-05, 3.61363528e-05, @@ -1379,9 +1472,12 @@ class Spatial_Median_Rate(_Spatial_Smoother): 3.61363528e-05, 4.46449551e-05]) >>> """ + def __init__(self, e, b, w, aw=None, iteration=1): if not w.id_order_set: - raise ValueError("w id_order must be set to align with the order of e and b") + raise ValueError( + "w id_order must be set to align with the order of e and b" + ) e = np.asarray(e).flatten() b = np.asarray(b).flatten() self.r = e * 1.0 / b @@ -1464,8 +1560,8 @@ class Spatial_Filtering(_Smoother): >>> bbox = [[-92.700676, 36.881809], [-87.916573, 40.3295669]] - The 11th and 14th columns in stl_hom.csv includes the number of homocides and population. - Creating two arrays from these columns. + The 11th and 14th columns in stl_hom.csv includes the number + of homocides and population. Creating two arrays from these columns. >>> stl_e, stl_b = np.array(stl[:,10]), np.array(stl[:,13]) @@ -1474,7 +1570,8 @@ class Spatial_Filtering(_Smoother): >>> sf_0 = Spatial_Filtering(bbox,d,stl_e,stl_b,10,10,r=2) - Extracting the resulting rates through the property r of the Spatial_Filtering instance + Extracting the resulting rates through the property + r of the Spatial_Filtering instance >>> sf_0.r[:10] array([4.23561763e-05, 4.45290850e-05, 4.56456221e-05, 4.49133384e-05, @@ -1491,7 +1588,8 @@ class Spatial_Filtering(_Smoother): >>> sf.r.shape (100,) - Extracting the resulting rates through the property r of the Spatial_Filtering instance + Extracting the resulting rates through the property + r of the Spatial_Filtering instance >>> sf.r[:10] array([3.73728738e-05, 4.04456300e-05, 4.04456300e-05, 3.81035327e-05, @@ -1500,13 +1598,15 @@ class Spatial_Filtering(_Smoother): """ def __init__(self, bbox, data, e, b, x_grid, y_grid, r=None, pop=None): - e= np.asarray(e).reshape(-1,1) - b= np.asarray(b).reshape(-1,1) + e = np.asarray(e).reshape(-1, 1) + b = np.asarray(b).reshape(-1, 1) data_tree = KDTree(data) x_range = bbox[1][0] - bbox[0][0] y_range = bbox[1][1] - bbox[0][1] - x, y = np.mgrid[bbox[0][0]:bbox[1][0]:float(x_range) / x_grid, - bbox[0][1]:bbox[1][1]:float(y_range) / y_grid] + x, y = np.mgrid[ + bbox[0][0] : bbox[1][0] : float(x_range) / x_grid, # noqa E203 + bbox[0][1] : bbox[1][1] : float(y_range) / y_grid, # noqa E203 + ] self.grid = list(zip(x.ravel(), y.ravel())) self.r = [] if r is None and pop is None: @@ -1528,9 +1628,9 @@ def __init__(self, bbox, data, e, b, x_grid, y_grid, r=None, pop=None): self.r.append(e_n_f[-1] * 1.0 / b_n_f[-1]) self.r = np.array(self.r) - @_requires('pandas') + @_requires("pandas") @classmethod - def by_col(cls, df, e, b, x_grid, y_grid, geom_col='geometry', **kwargs): + def by_col(cls, df, e, b, x_grid, y_grid, geom_col="geometry", **kwargs): """ Compute smoothing by columns in a dataframe. The bounding box and point information is computed from the geometry column. @@ -1562,6 +1662,7 @@ def by_col(cls, df, e, b, x_grid, y_grid, geom_col='geometry', **kwargs): cells. """ import pandas as pd + # prep for application over multiple event/population pairs if isinstance(e, str): e = [e] @@ -1580,16 +1681,18 @@ def by_col(cls, df, e, b, x_grid, y_grid, geom_col='geometry', **kwargs): res = [] for ename, bname, xgi, ygi in zip(e, b, x_grid, y_grid): r = cls(bbox, data, df[ename], df[bname], xgi, ygi, **kwargs) - grid = np.asarray(r.grid).reshape(-1,2) - name = '_'.join(('-'.join((ename, bname)), cls.__name__.lower())) - colnames = ('_'.join((name, suffix)) for suffix in ['X', 'Y', 'R']) - items = [(name, col) for name,col in zip(colnames, [grid[:,0], - grid[:,1], - r.r])] + grid = np.asarray(r.grid).reshape(-1, 2) + name = "_".join(("-".join((ename, bname)), cls.__name__.lower())) + colnames = ("_".join((name, suffix)) for suffix in ["X", "Y", "R"]) + items = [ + (name, col) + for name, col in zip(colnames, [grid[:, 0], grid[:, 1], r.r]) + ] res.append(pd.DataFrame.from_dict(dict(items))) outdf = pd.concat(res) return outdf + class Headbanging_Triples(object): """Generate a pseudo spatial weights instance that contains headbanging triples @@ -1614,7 +1717,8 @@ class Headbanging_Triples(object): key is observation record id, value is a list of the following: tuple of original triple observations distance between original triple observations - distance between an original triple observation and its extrapolated point + distance between an original triple + observation and its extrapolated point Examples -------- @@ -1626,7 +1730,9 @@ class Headbanging_Triples(object): Reading data in stl_hom.csv into stl_db to extract values for event and population-at-risk variables - >>> stl_db = libpysal.io.open(libpysal.examples.get_path('stl_hom.csv'),'r') # doctest: +SKIP + >>> stl_db = libpysal.io.open( + ... libpysal.examples.get_path('stl_hom.csv'),'r' + ... ) # doctest: +SKIP Reading the stl data in the WKT format so that we can easily extract polygon centroids @@ -1665,7 +1771,9 @@ class Headbanging_Triples(object): Opening sids2.shp file >>> import libpysal - >>> sids = libpysal.io.open(libpysal.examples.get_path('sids2.shp'),'r') # doctest: +SKIP + >>> sids = libpysal.io.open( + ... libpysal.examples.get_path('sids2.shp'),'r' + ... ) # doctest: +SKIP Extracting the centroids of polygons in the sids data @@ -1720,17 +1828,20 @@ class Headbanging_Triples(object): >>> round(extrapolated[1],5), round(extrapolated[2],6) # doctest: +SKIP (0.33753, 0.302707) """ + def __init__(self, data, w, k=5, t=3, angle=135.0, edgecor=False): - raise DeprecationWarning('Deprecated') + raise DeprecationWarning("Deprecated") if k < 3: - raise ValueError("w should be NeareastNeighbors instance & the number of neighbors should be more than 3.") + raise ValueError( + "`w` should be a `NeareastNeighbors` instance & the number " + "of neighbors should be more than 3." + ) if not w.id_order_set: raise ValueError("w id_order must be set to align with the order of data") self.triples, points = {}, {} for i, pnt in enumerate(data): ng = w.neighbor_offsets[i] - points[(i, Point(pnt))] = dict(list(zip(ng, [Point(d) - for d in data[ng]]))) + points[(i, Point(pnt))] = dict(list(zip(ng, [Point(d) for d in data[ng]]))) for i, pnt in list(points.keys()): ng = points[(i, pnt)] tr, tr_dis = {}, [] @@ -1742,8 +1853,8 @@ def __init__(self, data, w, k=5, t=3, angle=135.0, edgecor=False): if len(tr) > t: for c in list(tr.keys()): p2, p3 = tr[c] - tr_dis.append((get_segment_point_dist( - LineSegment(p2, p3), pnt), c)) + _seg = LineSegment(p2, p3) + tr_dis.append((get_segment_point_dist(_seg, pnt), c)) tr_dis = sorted(tr_dis)[:t] self.triples[i] = [trp for dis, trp in tr_dis] else: @@ -1768,15 +1879,27 @@ def __init__(self, data, w, k=5, t=3, angle=135.0, edgecor=False): dist_p_p2 = get_points_dist(point, p2) dist_p_p3 = get_points_dist(point, p3) if dist_p_p2 <= dist_p_p3: - ray1, ray2, s_pnt, dist, c = Ray(p2, point), Ray(p2, p3), p2, dist_p_p2, (ps[p2], ps[p3]) + ray1, ray2, s_pnt, dist, c = ( + Ray(p2, point), + Ray(p2, p3), + p2, + dist_p_p2, + (ps[p2], ps[p3]), + ) else: - ray1, ray2, s_pnt, dist, c = Ray(p3, point), Ray(p3, p2), p3, dist_p_p3, (ps[p3], ps[p2]) + ray1, ray2, s_pnt, dist, c = ( + Ray(p3, point), + Ray(p3, p2), + p3, + dist_p_p3, + (ps[p3], ps[p2]), + ) ang = get_angle_between(ray1, ray2) - if ang >= 90 + angle / 2 or (ang < 0 and ang + 360 >= 90 + angle / 2): - ex_point = get_point_at_angle_and_dist( - ray1, angle, dist) - extra = [c, dist_p2_p3, get_points_dist( - s_pnt, ex_point)] + if ang >= 90 + angle / 2 or ( + ang < 0 and ang + 360 >= 90 + angle / 2 + ): + ex_point = get_point_at_angle_and_dist(ray1, angle, dist) + extra = [c, dist_p2_p3, get_points_dist(s_pnt, ex_point)] break self.triples[ps[point]].append(extra[0]) self.extra[ps[point]] = extra @@ -1805,109 +1928,110 @@ class Headbanging_Median_Rate(object): Examples -------- - >>> import libpysal # doctest: +SKIP - + >>> import libpysal # doctest: +SKIP opening the sids2 shapefile - >>> sids = libpysal.io.open(libpysal.examples.get_path('sids2.shp'), 'r') # doctest: +SKIP - + >>> sids = libpysal.io.open( + ... libpysal.examples.get_path('sids2.shp'), 'r' + ... ) # doctest: +SKIP extracting the centroids of polygons in the sids2 data - >>> sids_d = np.array([i.centroid for i in sids]) # doctest: +SKIP - + >>> sids_d = np.array([i.centroid for i in sids]) # doctest: +SKIP creating a 5-nearest neighbors weights from the centroids - >>> sids_w = libpysal.weights.KNN(sids_d,k=5) # doctest: +SKIP - + >>> sids_w = libpysal.weights.KNN(sids_d,k=5) # doctest: +SKIP ensuring that the members in sids_w are ordered - >>> if not sids_w.id_order_set: sids_w.id_order = sids_w.id_order # doctest: +SKIP - + >>> if not sids_w.id_order_set: sids_w.id_order = sids_w.id_order # doctest: +SKIP finding headbanging triples by using 5 neighbors return outdf - >>> s_ht = Headbanging_Triples(sids_d,sids_w,k=5) # doctest: +SKIP + >>> s_ht = Headbanging_Triples(sids_d,sids_w,k=5) # doctest: +SKIP DeprecationWarning: Deprecated - reading in the sids2 data table - >>> sids_db = libpysal.io.open(libpysal.examples.get_path('sids2.dbf'), 'r') # doctest: +SKIP - + >>> sids_db = libpysal.io.open( + ... libpysal.examples.get_path('sids2.dbf'), 'r' + ... ) # doctest: +SKIP extracting the 10th and 9th columns in the sids2.dbf and using data values as event and population-at-risk variables - >>> s_e, s_b = np.array(sids_db[:,9]), np.array(sids_db[:,8]) # doctest: +SKIP - + >>> s_e, s_b = np.array(sids_db[:,9]), np.array(sids_db[:,8]) # doctest: +SKIP computing headbanging median rates from s_e, s_b, and s_ht - >>> sids_hb_r = Headbanging_Median_Rate(s_e,s_b,s_ht) # doctest: +SKIP - + >>> sids_hb_r = Headbanging_Median_Rate(s_e,s_b,s_ht) # doctest: +SKIP - extracting the computed rates through the property r of the Headbanging_Median_Rate instance + extracting the computed rates through the property r of the + Headbanging_Median_Rate instance - >>> sids_hb_r.r[:5] # doctest: +SKIP + >>> sids_hb_r.r[:5] # doctest: +SKIP array([ 0.00075586, 0. , 0.0008285 , 0.0018315 , 0.00498891]) recomputing headbanging median rates with 5 iterations - >>> sids_hb_r2 = Headbanging_Median_Rate(s_e,s_b,s_ht,iteration=5) # doctest: +SKIP + >>> sids_hb_r2 = Headbanging_Median_Rate( + ... s_e,s_b,s_ht,iteration=5 + ... ) # doctest: +SKIP + extracting the computed rates through the property r of the + Headbanging_Median_Rate instance - extracting the computed rates through the property r of the Headbanging_Median_Rate instance - - >>> sids_hb_r2.r[:5] # doctest: +SKIP + >>> sids_hb_r2.r[:5] # doctest: +SKIP array([ 0.0008285 , 0.00084331, 0.00086896, 0.0018315 , 0.00498891]) recomputing headbanging median rates by considring a set of auxilliary weights - >>> sids_hb_r3 = Headbanging_Median_Rate(s_e,s_b,s_ht,aw=s_b) # doctest: +SKIP - + >>> sids_hb_r3 = Headbanging_Median_Rate(s_e,s_b,s_ht,aw=s_b) # doctest: +SKIP - extracting the computed rates through the property r of the Headbanging_Median_Rate instance + extracting the computed rates through the property r of the + Headbanging_Median_Rate instance - >>> sids_hb_r3.r[:5] # doctest: +SKIP + >>> sids_hb_r3.r[:5] # doctest: +SKIP array([ 0.00091659, 0. , 0.00156838, 0.0018315 , 0.00498891]) """ + def __init__(self, e, b, t, aw=None, iteration=1): - raise DeprecationWarning('Deprecated') + raise DeprecationWarning("Deprecated") self.r = e * 1.0 / b self.tr, self.aw = t.triples, aw - if hasattr(t, 'extra'): + if hasattr(t, "extra"): self.extra = t.extra while iteration: self.__search_headbanging_median() iteration -= 1 def __get_screens(self, id, triples, weighted=False): - r, tr = self.r, self.tr + r = self.r if len(triples) == 0: return r[id] - if hasattr(self, 'extra') and id in self.extra: + if hasattr(self, "extra") and id in self.extra: extra = self.extra trp_r = r[list(triples[0])] # observed rate # plus difference in rate scaled by ratio of extrapolated distance # & observed distance. trp_r[-1] = trp_r[0] + (trp_r[0] - trp_r[-1]) * ( - extra[id][-1] * 1.0 / extra[id][1]) + extra[id][-1] * 1.0 / extra[id][1] + ) trp_r = sorted(trp_r) if not weighted: return r[id], trp_r[0], trp_r[-1] else: trp_aw = self.aw[triples[0]] - extra_w = trp_aw[0] + (trp_aw[0] - trp_aw[- - 1]) * (extra[id][-1] * 1.0 / extra[id][1]) + extra_w = trp_aw[0] + (trp_aw[0] - trp_aw[-1]) * ( + extra[id][-1] * 1.0 / extra[id][1] + ) return r[id], trp_r[0], trp_r[-1], self.aw[id], trp_aw[0] + extra_w if not weighted: lowest, highest = [], [] @@ -1921,19 +2045,23 @@ def __get_screens(self, id, triples, weighted=False): lowest_aw, highest_aw = [], [] for trp in triples: trp_r = r[list(trp)] - dtype = [('r', '%s' % trp_r.dtype), ('w', - '%s' % self.aw.dtype)] + dtype = [("r", "%s" % trp_r.dtype), ("w", "%s" % self.aw.dtype)] trp_r = np.array(list(zip(trp_r, list(trp))), dtype=dtype) - trp_r.sort(order='r') - lowest.append(trp_r['r'][0]) - highest.append(trp_r['r'][-1]) - lowest_aw.append(self.aw[int(round(trp_r['w'][0]))]) - highest_aw.append(self.aw[int(round(trp_r['w'][-1]))]) + trp_r.sort(order="r") + lowest.append(trp_r["r"][0]) + highest.append(trp_r["r"][-1]) + lowest_aw.append(self.aw[int(round(trp_r["w"][0]))]) + highest_aw.append(self.aw[int(round(trp_r["w"][-1]))]) wm_lowest = weighted_median(np.array(lowest), np.array(lowest_aw)) - wm_highest = weighted_median( - np.array(highest), np.array(highest_aw)) + wm_highest = weighted_median(np.array(highest), np.array(highest_aw)) triple_members = flatten(triples, unique=False) - return r[id], wm_lowest, wm_highest, self.aw[id] * len(triples), self.aw[triple_members].sum() + return ( + r[id], + wm_lowest, + wm_highest, + self.aw[id] * len(triples), + self.aw[triple_members].sum(), + ) def __get_median_from_screens(self, screens): if isinstance(screens, float): @@ -1952,17 +2080,16 @@ def __get_median_from_screens(self, screens): return rk def __search_headbanging_median(self): - r, tr = self.r, self.tr + tr = self.tr new_r = [] for k in list(tr.keys()): - screens = self.__get_screens( - k, tr[k], weighted=(self.aw is not None)) + screens = self.__get_screens(k, tr[k], weighted=(self.aw is not None)) new_r.append(self.__get_median_from_screens(screens)) self.r = np.array(new_r) - @_requires('pandas') + @_requires("pandas") @classmethod - def by_col(cls, df, e, b, t=None, geom_col='geometry', inplace=False, **kwargs): + def by_col(cls, df, e, b, t=None, geom_col="geometry", inplace=False, **kwargs): """ Compute smoothing by columns in a dataframe. The bounding box and point information is computed from the geometry column. @@ -1996,12 +2123,12 @@ def by_col(cls, df, e, b, t=None, geom_col='geometry', inplace=False, **kwargs): event/population pairs. If done inplace, there is no return value and `df` is modified in place. """ - import pandas as pd + if not inplace: new = df.copy() cls.by_col(new, e, b, t=t, geom_col=geom_col, inplace=True, **kwargs) return new - import pandas as pd + # prep for application over multiple event/population pairs if isinstance(e, str): e = [e] @@ -2012,10 +2139,10 @@ def by_col(cls, df, e, b, t=None, geom_col='geometry', inplace=False, **kwargs): data = get_points_array(df[geom_col]) - #Headbanging_Triples doesn't take **kwargs, so filter its arguments + # Headbanging_Triples doesn't take **kwargs, so filter its arguments # (self, data, w, k=5, t=3, angle=135.0, edgecor=False): - w = kwargs.pop('w', None) + w = kwargs.pop("w", None) if w is None: found = False for k in df._metadata: @@ -2023,20 +2150,20 @@ def by_col(cls, df, e, b, t=None, geom_col='geometry', inplace=False, **kwargs): if isinstance(w, W): found = True if not found: - raise Exception('Weights not provided and no weights attached to frame!' - ' Please provide a weight or attach a weight to the' - ' dataframe') + raise Exception( + "Weights not provided and no weights attached to frame!" + " Please provide a weight or attach a weight to the" + " dataframe" + ) - k = kwargs.pop('k', 5) - t = kwargs.pop('t', 3) - angle = kwargs.pop('angle', 135.0) - edgecor = kwargs.pop('edgecor', False) + k = kwargs.pop("k", 5) + t = kwargs.pop("t", 3) + angle = kwargs.pop("angle", 135.0) + edgecor = kwargs.pop("edgecor", False) - hbt = Headbanging_Triples(data, w, k=k, t=t, angle=angle, - edgecor=edgecor) + hbt = Headbanging_Triples(data, w, k=k, t=t, angle=angle, edgecor=edgecor) - res = [] for ename, bname in zip(e, b): r = cls(df[ename], df[bname], hbt, **kwargs).r - name = '_'.join(('-'.join((ename, bname)), cls.__name__.lower())) + name = "_".join(("-".join((ename, bname)), cls.__name__.lower())) df[name] = r diff --git a/esda/tabular.py b/esda/tabular.py index 3ba7231c..2dfdffa2 100644 --- a/esda/tabular.py +++ b/esda/tabular.py @@ -1,8 +1,10 @@ # from ...common import requires as _requires import itertools as _it + from libpysal.weights import W + # I would like to define it like this, so that you could make a call like: # Geary(df, 'HOVAL', 'INC', w=W), but this only works in Python3. So, I have to # use a workaround @@ -51,7 +53,7 @@ def _univariate_handler( **kwargs : optional keyword arguments options that are passed directly to the statistic """ - ### Preprocess + # Preprocess if not inplace: new_df = df.copy() _univariate_handler( @@ -77,7 +79,7 @@ def _univariate_handler( " Please provide a weight or attach a weight to the" " dataframe" ) - ### Prep indexes + # Prep indexes if outvals is None: outvals = [] outvals.insert(0, "_statistic") @@ -100,13 +102,13 @@ def _univariate_handler( if isinstance(cols, str): cols = [cols] - ### Make closure around weights & apply columnwise + # Make closure around weights & apply columnwise def column_stat(column): return stat(column.values, w=w, **kwargs) stat_objs = df[cols].apply(column_stat) - ### Assign into dataframe + # Assign into dataframe for col in cols: stat_obj = stat_objs[col] y = kwargs.get("y") diff --git a/esda/tests/test_adbscan.py b/esda/tests/test_adbscan.py index b2dafebb..8440012e 100644 --- a/esda/tests/test_adbscan.py +++ b/esda/tests/test_adbscan.py @@ -1,10 +1,11 @@ import unittest -import pandas -import numpy as np -from .. import adbscan +import numpy as np +import pandas import pytest +from .. import adbscan + class ADBSCAN_Tester(unittest.TestCase): def setUp(self): @@ -271,7 +272,13 @@ def test_get_cluster_boundary(self): # # Single Core # # ------------------------# polys = adbscan.get_cluster_boundary(self.labels, self.db, xy=["x", "y"]) - wkt = "POLYGON ((0.7217553174317995 0.8192869956700687, 0.7605307121989587 0.9086488808086682, 0.9177741225129434 0.8568503024577332, 0.8126209616521135 0.6262871483113925, 0.6125260668293881 0.5475861559192435, 0.5425443680112613 0.7546476915298572, 0.7217553174317995 0.8192869956700687))" + wkt = ( + "POLYGON ((0.7217553174317995 0.8192869956700687, 0.7605307121989587 " + "0.9086488808086682, 0.9177741225129434 0.8568503024577332, " + "0.8126209616521135 0.6262871483113925, 0.6125260668293881 " + "0.5475861559192435, 0.5425443680112613 0.7546476915298572, " + "0.7217553174317995 0.8192869956700687))" + ) self.assertEqual(polys[0].wkt, wkt) # ------------------------# @@ -280,7 +287,13 @@ def test_get_cluster_boundary(self): polys = adbscan.get_cluster_boundary( self.labels, self.db, xy=["x", "y"], n_jobs=-1 ) - wkt = "POLYGON ((0.7217553174317995 0.8192869956700687, 0.7605307121989587 0.9086488808086682, 0.9177741225129434 0.8568503024577332, 0.8126209616521135 0.6262871483113925, 0.6125260668293881 0.5475861559192435, 0.5425443680112613 0.7546476915298572, 0.7217553174317995 0.8192869956700687))" + wkt = ( + "POLYGON ((0.7217553174317995 0.8192869956700687, 0.7605307121989587 " + "0.9086488808086682, 0.9177741225129434 0.8568503024577332, " + "0.8126209616521135 0.6262871483113925, 0.6125260668293881 " + "0.5475861559192435, 0.5425443680112613 0.7546476915298572, " + "0.7217553174317995 0.8192869956700687))" + ) self.assertEqual(polys[0].wkt, wkt) diff --git a/esda/tests/test_gamma.py b/esda/tests/test_gamma.py index 1e84091b..5d9870ce 100644 --- a/esda/tests/test_gamma.py +++ b/esda/tests/test_gamma.py @@ -1,12 +1,17 @@ import unittest + import numpy as np +from libpysal.common import pandas from libpysal.weights.util import lat2W + from ..gamma import Gamma -from libpysal.common import pandas PANDAS_EXTINCT = pandas is None + + class Gamma_Tester(unittest.TestCase): """Unit test for Gamma Index""" + def setUp(self): self.w = lat2W(4, 4) self.y = np.ones(16) @@ -24,7 +29,7 @@ def test_Gamma(self): self.assertAlmostEqual(g.max_g, 20.0) self.assertAlmostEqual(g.mean_g, 11.093093093093094) np.random.seed(12345) - g1 = Gamma(self.y, self.w, operation='s') + g1 = Gamma(self.y, self.w, operation="s") self.assertAlmostEqual(g1.g, 8.0) self.assertAlmostEqual(g1.g_z, -3.7057554345954791) self.assertAlmostEqual(g1.p_sim_g, 0.001) @@ -32,7 +37,7 @@ def test_Gamma(self): self.assertAlmostEqual(g1.max_g, 48.0) self.assertAlmostEqual(g1.mean_g, 25.623623623623622) np.random.seed(12345) - g2 = Gamma(self.y, self.w, operation='a') + g2 = Gamma(self.y, self.w, operation="a") self.assertAlmostEqual(g2.g, 8.0) self.assertAlmostEqual(g2.g_z, -3.7057554345954791) self.assertAlmostEqual(g2.p_sim_g, 0.001) @@ -58,22 +63,23 @@ def func(z, i, j): self.assertAlmostEqual(g4.g_z, 3.1879280354548638) self.assertAlmostEqual(g4.p_sim_g, 0.0030000000000000001) - @unittest.skipIf(PANDAS_EXTINCT, 'missing pandas') + @unittest.skipIf(PANDAS_EXTINCT, "missing pandas") def test_by_col(self): import pandas as pd - df = pd.DataFrame(self.y, columns=['y']) - r1 = Gamma.by_col(df, ['y'], w=self.w) - self.assertIn('y_gamma', r1.columns) - self.assertIn('y_p_sim', r1.columns) + + df = pd.DataFrame(self.y, columns=["y"]) + r1 = Gamma.by_col(df, ["y"], w=self.w) + self.assertIn("y_gamma", r1.columns) + self.assertIn("y_p_sim", r1.columns) this_gamma = np.unique(r1.y_gamma.values) this_pval = np.unique(r1.y_p_sim.values) np.testing.assert_allclose(this_gamma, self.g.g) np.testing.assert_allclose(this_pval, self.g.p_sim) - Gamma.by_col(df, ['y'], inplace=True, operation='s', w=self.w) + Gamma.by_col(df, ["y"], inplace=True, operation="s", w=self.w) this_gamma = np.unique(df.y_gamma.values) this_pval = np.unique(df.y_p_sim.values) np.testing.assert_allclose(this_gamma, 8.0) - np.testing.assert_allclose(this_pval, .001) + np.testing.assert_allclose(this_pval, 0.001) suite = unittest.TestSuite() @@ -82,6 +88,6 @@ def test_by_col(self): a = unittest.TestLoader().loadTestsFromTestCase(i) suite.addTest(a) -if __name__ == '__main__': +if __name__ == "__main__": runner = unittest.TextTestRunner() runner.run(suite) diff --git a/esda/tests/test_geary.py b/esda/tests/test_geary.py index c421f081..18c467b2 100644 --- a/esda/tests/test_geary.py +++ b/esda/tests/test_geary.py @@ -1,14 +1,13 @@ """Geary Unittest.""" import unittest -from libpysal.io import open as popen +import numpy as np from libpysal import examples from libpysal.common import pandas -from libpysal.weights import W +from libpysal.io import open as popen from libpysal.weights import Rook from .. import geary -import numpy as np PANDAS_EXTINCT = pandas is None diff --git a/esda/tests/test_getisord.py b/esda/tests/test_getisord.py index 35ed3a42..35547215 100644 --- a/esda/tests/test_getisord.py +++ b/esda/tests/test_getisord.py @@ -1,9 +1,10 @@ import unittest + import numpy as np +from libpysal.common import ATOL, RTOL, pandas +from libpysal.weights.distance import DistanceBand from .. import getisord -from libpysal.weights.distance import DistanceBand -from libpysal.common import pandas, RTOL, ATOL POINTS = np.array([(10, 10), (20, 10), (40, 10), (15, 20), (30, 20), (30, 30)]) W = DistanceBand(POINTS, threshold=15) diff --git a/esda/tests/test_join_counts.py b/esda/tests/test_join_counts.py index 4bdbbd51..b3fda32d 100644 --- a/esda/tests/test_join_counts.py +++ b/esda/tests/test_join_counts.py @@ -1,14 +1,17 @@ import unittest + import numpy as np +from libpysal.common import pandas +from libpysal.weights.util import lat2W from ..join_counts import Join_Counts -from libpysal.weights.util import lat2W -from libpysal.common import pandas PANDAS_EXTINCT = pandas is None + class Join_Counts_Tester(unittest.TestCase): """Unit test for Join Counts""" + def setUp(self): self.w = lat2W(4, 4) self.y = np.ones(16) @@ -41,12 +44,15 @@ def test_Join_Counts(self): self.assertAlmostEqual(0.001, jc.p_sim_autocorr_pos) self.assertAlmostEqual(0.2653504320039377, jc.sim_autocorr_chi2) - @unittest.skipIf(PANDAS_EXTINCT, 'missing pandas') + @unittest.skipIf(PANDAS_EXTINCT, "missing pandas") def test_by_col(self): import pandas as pd - df = pd.DataFrame(self.y, columns=['y']) + + df = pd.DataFrame(self.y, columns=["y"]) np.random.seed(12345) - r1 = Join_Counts.by_col(df, ['y'], w=self.w, permutations=999) # outvals = ['bb', 'bw', 'ww', 'p_sim_bw', 'p_sim_bb'] + r1 = Join_Counts.by_col( + df, ["y"], w=self.w, permutations=999 + ) # outvals = ['bb', 'bw', 'ww', 'p_sim_bw', 'p_sim_bb'] bb = np.unique(r1.y_bb.values) bw = np.unique(r1.y_bw.values) @@ -59,12 +65,13 @@ def test_by_col(self): self.assertAlmostEqual(bb_p, c.p_sim_bb) self.assertAlmostEqual(bw_p, c.p_sim_bw) + suite = unittest.TestSuite() test_classes = [Join_Counts_Tester] for i in test_classes: a = unittest.TestLoader().loadTestsFromTestCase(i) suite.addTest(a) -if __name__ == '__main__': +if __name__ == "__main__": runner = unittest.TextTestRunner() runner.run(suite) diff --git a/esda/tests/test_ljc.py b/esda/tests/test_ljc.py index 217c13e1..14235421 100644 --- a/esda/tests/test_ljc.py +++ b/esda/tests/test_ljc.py @@ -1,31 +1,32 @@ # based off: https://github.com/pysal/esda/blob/master/esda/tests/test_join_counts.py import unittest + import numpy as np -from libpysal.weights.util import lat2W from libpysal.common import pandas +from libpysal.weights.util import lat2W from esda.join_counts_local import Join_Counts_Local PANDAS_EXTINCT = pandas is None + class Join_Counts_Locals_Tester(unittest.TestCase): """Unit test for Local Join Counts (univariate)""" + def setUp(self): self.w = lat2W(4, 4) self.y = np.ones(16) self.y[0:8] = 0 def test_Join_Counts_Locals(self): - """Test method""" - np.random.seed(12345) - ljc = Join_Counts_Local(connectivity=self.w).fit(self.y) - assert np.array_equal(ljc.LJC, [0, 0, 0, 0, 0, 0, 0, 0, 2, 3, 3, 2, 2, 3, 3, 2]) - - + """Test method""" + np.random.seed(12345) + ljc = Join_Counts_Local(connectivity=self.w).fit(self.y) + assert np.array_equal(ljc.LJC, [0, 0, 0, 0, 0, 0, 0, 0, 2, 3, 3, 2, 2, 3, 3, 2]) + + suite = unittest.TestSuite() -test_classes = [ - Join_Counts_Locals_Tester -] +test_classes = [Join_Counts_Locals_Tester] for i in test_classes: a = unittest.TestLoader().loadTestsFromTestCase(i) suite.addTest(a) diff --git a/esda/tests/test_ljc_bv.py b/esda/tests/test_ljc_bv.py index 8e9af20a..4caa8f8d 100644 --- a/esda/tests/test_ljc_bv.py +++ b/esda/tests/test_ljc_bv.py @@ -1,34 +1,43 @@ # based off: https://github.com/pysal/esda/blob/master/tests/test_join_counts.py import unittest + import numpy as np -from libpysal.weights.util import lat2W from libpysal.common import pandas +from libpysal.weights.util import lat2W from esda.join_counts_local_bv import Join_Counts_Local_BV PANDAS_EXTINCT = pandas is None + class Local_Join_Counts_BV_Tester(unittest.TestCase): """Unit test for Local Join Counts (univariate)""" + def setUp(self): self.w = lat2W(4, 4) self.x = np.ones(16) self.x[0:8] = 0 - self.z = [0,1,0,1,1,1,1,1,0,0,1,1,0,0,1,1] + self.z = [0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1] def test_Local_Join_Counts_BV(self): - """Test method""" - np.random.seed(12345) - ljc_bv_case1 = Join_Counts_Local_BV(connectivity=self.w).fit(self.x, self.z, case="BJC") - ljc_bv_case2 = Join_Counts_Local_BV(connectivity=self.w).fit(self.x, self.z, case="CLC") - assert np.array_equal(ljc_bv_case1.LJC, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0]) - assert np.array_equal(ljc_bv_case2.LJC, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 0, 0, 2, 2]) - + """Test method""" + np.random.seed(12345) + ljc_bv_case1 = Join_Counts_Local_BV(connectivity=self.w).fit( + self.x, self.z, case="BJC" + ) + ljc_bv_case2 = Join_Counts_Local_BV(connectivity=self.w).fit( + self.x, self.z, case="CLC" + ) + assert np.array_equal( + ljc_bv_case1.LJC, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0] + ) + assert np.array_equal( + ljc_bv_case2.LJC, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 0, 0, 2, 2] + ) + suite = unittest.TestSuite() -test_classes = [ - Local_Join_Counts_BV_Tester -] +test_classes = [Local_Join_Counts_BV_Tester] for i in test_classes: a = unittest.TestLoader().loadTestsFromTestCase(i) suite.addTest(a) diff --git a/esda/tests/test_ljc_mv.py b/esda/tests/test_ljc_mv.py index 7a67b7a1..49095961 100644 --- a/esda/tests/test_ljc_mv.py +++ b/esda/tests/test_ljc_mv.py @@ -1,33 +1,36 @@ # based off: https://github.com/pysal/esda/blob/master/tests/test_join_counts.py import unittest + import numpy as np -from libpysal.weights.util import lat2W from libpysal.common import pandas +from libpysal.weights.util import lat2W from esda.join_counts_local_mv import Join_Counts_Local_MV PANDAS_EXTINCT = pandas is None + class Local_Join_Counts_MV_Tester(unittest.TestCase): """Unit test for Local Join Counts (univariate)""" + def setUp(self): self.w = lat2W(4, 4) self.x = np.ones(16) self.x[0:8] = 0 - self.y = [0,1,0,1,1,1,1,1,0,0,1,1,0,0,1,1] - self.z = [0,1,1,1,1,1,1,1,0,0,0,1,0,0,1,1] - + self.y = [0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1] + self.z = [0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1, 1] def test_Local_Join_Counts_MV(self): - """Test method""" - np.random.seed(12345) - ljc_mv = Join_Counts_Local_MV(connectivity=self.w).fit([self.x, self.y, self.z]) - assert np.array_equal(ljc_mv.LJC, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 2]) - + """Test method""" + np.random.seed(12345) + ljc_mv = Join_Counts_Local_MV(connectivity=self.w).fit([self.x, self.y, self.z]) + assert np.array_equal( + ljc_mv.LJC, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 2] + ) + + suite = unittest.TestSuite() -test_classes = [ - Local_Join_Counts_MV_Tester -] +test_classes = [Local_Join_Counts_MV_Tester] for i in test_classes: a = unittest.TestLoader().loadTestsFromTestCase(i) suite.addTest(a) diff --git a/esda/tests/test_local_geary.py b/esda/tests/test_local_geary.py index 42197f90..3cb97c10 100644 --- a/esda/tests/test_local_geary.py +++ b/esda/tests/test_local_geary.py @@ -1,27 +1,26 @@ import unittest + import libpysal -from libpysal.common import pandas, RTOL, ATOL -from esda.geary_local import Geary_Local import numpy as np -PANDAS_EXTINCT = pandas is None +from esda.geary_local import Geary_Local + class Geary_Local_Tester(unittest.TestCase): def setUp(self): np.random.seed(10) self.w = libpysal.io.open(libpysal.examples.get_path("stl.gal")).read() f = libpysal.io.open(libpysal.examples.get_path("stl_hom.txt")) - self.y = np.array(f.by_col['HR8893']) + self.y = np.array(f.by_col["HR8893"]) def test_local_geary(self): lG = Geary_Local(connectivity=self.w).fit(self.y) self.assertAlmostEqual(lG.localG[0], 0.696703432) self.assertAlmostEqual(lG.p_sim[0], 0.19) - + + suite = unittest.TestSuite() -test_classes = [ - Geary_Local_Tester -] +test_classes = [Geary_Local_Tester] for i in test_classes: a = unittest.TestLoader().loadTestsFromTestCase(i) suite.addTest(a) @@ -29,4 +28,3 @@ def test_local_geary(self): if __name__ == "__main__": runner = unittest.TextTestRunner() runner.run(suite) - diff --git a/esda/tests/test_local_geary_mv.py b/esda/tests/test_local_geary_mv.py index aec3dc00..21bb40a2 100644 --- a/esda/tests/test_local_geary_mv.py +++ b/esda/tests/test_local_geary_mv.py @@ -1,29 +1,28 @@ import unittest + import libpysal -from libpysal.common import pandas, RTOL, ATOL -from esda.geary_local_mv import Geary_Local_MV import numpy as np -PANDAS_EXTINCT = pandas is None +from esda.geary_local_mv import Geary_Local_MV + class Geary_Local_MV_Tester(unittest.TestCase): def setUp(self): np.random.seed(100) self.w = libpysal.io.open(libpysal.examples.get_path("stl.gal")).read() f = libpysal.io.open(libpysal.examples.get_path("stl_hom.txt")) - self.y1 = np.array(f.by_col['HR8893']) - self.y2 = np.array(f.by_col['HC8488']) + self.y1 = np.array(f.by_col["HR8893"]) + self.y2 = np.array(f.by_col["HC8488"]) def test_local_geary_mv(self): lG_mv = Geary_Local_MV(connectivity=self.w).fit([self.y1, self.y2]) print(lG_mv.p_sim[0]) self.assertAlmostEqual(lG_mv.localG[0], 0.4096931479581422) self.assertAlmostEqual(lG_mv.p_sim[0], 0.211) - + + suite = unittest.TestSuite() -test_classes = [ - Geary_Local_MV_Tester -] +test_classes = [Geary_Local_MV_Tester] for i in test_classes: a = unittest.TestLoader().loadTestsFromTestCase(i) suite.addTest(a) diff --git a/esda/tests/test_losh.py b/esda/tests/test_losh.py index 6cad83db..8ddc8606 100644 --- a/esda/tests/test_losh.py +++ b/esda/tests/test_losh.py @@ -1,28 +1,27 @@ # based off: https://github.com/pysal/esda/blob/master/tests/test_moran.py#L96 import unittest + import libpysal -from libpysal.common import pandas, RTOL, ATOL -from esda.losh import LOSH import numpy as np -PANDAS_EXTINCT = pandas is None +from esda.losh import LOSH + class Losh_Tester(unittest.TestCase): def setUp(self): np.random.seed(10) self.w = libpysal.io.open(libpysal.examples.get_path("stl.gal")).read() f = libpysal.io.open(libpysal.examples.get_path("stl_hom.txt")) - self.y = np.array(f.by_col['HR8893']) + self.y = np.array(f.by_col["HR8893"]) def test_losh(self): ls = LOSH(connectivity=self.w, inference="chi-square").fit(self.y) self.assertAlmostEqual(ls.Hi[0], 0.77613471) self.assertAlmostEqual(ls.pval[0], 0.22802201) - + + suite = unittest.TestSuite() -test_classes = [ - Losh_Tester -] +test_classes = [Losh_Tester] for i in test_classes: a = unittest.TestLoader().loadTestsFromTestCase(i) suite.addTest(a) @@ -30,4 +29,3 @@ def test_losh(self): if __name__ == "__main__": runner = unittest.TextTestRunner() runner.run(suite) - diff --git a/esda/tests/test_map_comparison.py b/esda/tests/test_map_comparison.py index afaeaa29..7964578e 100644 --- a/esda/tests/test_map_comparison.py +++ b/esda/tests/test_map_comparison.py @@ -1,4 +1,9 @@ -import numpy, pytest, geopandas, os +import os + +import geopandas +import numpy +import pytest + from .. import map_comparison as mc pygeos = pytest.importorskip("pygeos") @@ -63,6 +68,10 @@ def test_external_entropy(): numpy.testing.assert_allclose(v1, v2) numpy.testing.assert_allclose(v1, 0.3612313462) v1_ctipped = mc.external_entropy(r1, r2, base=base, balance=100) - numpy.testing.assert_allclose(v1_ctipped, 0.42275150844) #value from completeness + numpy.testing.assert_allclose( + v1_ctipped, 0.42275150844 + ) # value from completeness v1_htipped = mc.external_entropy(r1, r2, base=base, balance=-100) - numpy.testing.assert_allclose(v1_htipped, 0.31534179219) #value from homogeneity + numpy.testing.assert_allclose( + v1_htipped, 0.31534179219 + ) # value from homogeneity diff --git a/esda/tests/test_mixture_smoothing.py b/esda/tests/test_mixture_smoothing.py index e9806315..cd662342 100644 --- a/esda/tests/test_mixture_smoothing.py +++ b/esda/tests/test_mixture_smoothing.py @@ -1,11 +1,13 @@ import unittest + import numpy as np -import libpysal + from .. import mixture_smoothing as m_s class MS_Tester(unittest.TestCase): """Mixture_Smoothing Unit Tests""" + def setUp(self): self.e = np.array([10, 5, 12, 20]) self.b = np.array([100, 150, 80, 200]) @@ -13,29 +15,28 @@ def setUp(self): def test_NP_Mixture_Smoother(self): """Test the main class""" mix = m_s.NP_Mixture_Smoother(self.e, self.b) - np.testing.assert_array_almost_equal(mix.r, np.array( - [0.10982278, 0.03445531, 0.11018404, 0.11018604])) np.testing.assert_array_almost_equal( - mix.category, np.array([1, 0, 1, 1])) - #self.failUnless(mix.getSeed(), (np.array([ 0.5, 0.5]), np.array([ 0.03333333, + mix.r, np.array([0.10982278, 0.03445531, 0.11018404, 0.11018604]) + ) + np.testing.assert_array_almost_equal(mix.category, np.array([1, 0, 1, 1])) + # self.failUnless(mix.getSeed(), (np.array([ 0.5, 0.5]), np.array([ 0.03333333, # 0.15 ]))) left, right = mix.getSeed() np.testing.assert_array_almost_equal(left, np.array([0.5, 0.5])) - np.testing.assert_array_almost_equal( - right, np.array([0.03333333, 0.15])) + np.testing.assert_array_almost_equal(right, np.array([0.03333333, 0.15])) d = mix.mixalg() np.testing.assert_array_almost_equal( - d['mix_den'], np.array([0., 0., 0., 0.])) - np.testing.assert_array_almost_equal(d['gradient'], np.array([0.])) - np.testing.assert_array_almost_equal(d['p'], np.array([1.])) - np.testing.assert_array_almost_equal( - d['grid'], np.array([11.27659574])) - self.assertEqual(d['k'], 1) - self.assertEqual(d['accuracy'], 1.0) + d["mix_den"], np.array([0.0, 0.0, 0.0, 0.0]) + ) + np.testing.assert_array_almost_equal(d["gradient"], np.array([0.0])) + np.testing.assert_array_almost_equal(d["p"], np.array([1.0])) + np.testing.assert_array_almost_equal(d["grid"], np.array([11.27659574])) + self.assertEqual(d["k"], 1) + self.assertEqual(d["accuracy"], 1.0) left, right = mix.getRateEstimates() np.testing.assert_array_almost_equal( - left, np.array([0.0911574, 0.0911574, - 0.0911574, 0.0911574])) + left, np.array([0.0911574, 0.0911574, 0.0911574, 0.0911574]) + ) np.testing.assert_array_almost_equal(right, np.array([1, 1, 1, 1])) diff --git a/esda/tests/test_moran.py b/esda/tests/test_moran.py index 65319885..9c347539 100644 --- a/esda/tests/test_moran.py +++ b/esda/tests/test_moran.py @@ -1,11 +1,11 @@ import unittest + import libpysal -from libpysal.common import pandas, RTOL, ATOL -from .. import moran import numpy as np +from libpysal.common import ATOL, RTOL +from .. import moran -PANDAS_EXTINCT = pandas is None SEED = 12345 @@ -131,7 +131,6 @@ def test_Moran_Local_parallel(self): self.assertAlmostEqual(lm.z_sim[0], -0.6990291160835514) self.assertAlmostEqual(lm.p_z_sim[0], 0.24226691753791396) - @unittest.skip("This function is being deprecated in the next release.") def test_by_col(self): import pandas as pd @@ -158,28 +157,58 @@ def test_local_moments(self): permutations=0, seed=SEED, ) - + wikh_fast = moran._wikh_fast(lm.w.sparse) wikh_slow = moran._wikh_slow(lm.w.sparse) - wikh_fast_c = moran._wikh_fast(lm.w.sparse, sokal_correction=True) - wikh_slow_c = moran._wikh_slow(lm.w.sparse, sokal_correction=True) + # wikh_fast_c = moran._wikh_fast(lm.w.sparse, sokal_correction=True) + # wikh_slow_c = moran._wikh_slow(lm.w.sparse, sokal_correction=True) np.testing.assert_allclose(wikh_fast, wikh_slow, rtol=RTOL, atol=ATOL) np.testing.assert_allclose(wikh_fast, wikh_slow, rtol=RTOL, atol=ATOL) - EIc = np.array([-0.00838113, -0.0243949 , -0.07031778, - -0.21520869, -0.16547163, -0.00178435, - -0.11531888, -0.36138555, -0.05471258, -0.09413562]) - VIc = np.array([0.03636013, 0.10412408, 0.28600769, - 0.26389674, 0.21576683, 0.00779261, - 0.44633942, 0.57696508, 0.12929777, 0.3730742 ]) - - EI = -np.ones((10,))/9 - VI = np.array([0.47374172, 0.47356458, 0.47209663, - 0.15866023, 0.15972526, 0.47376436, - 0.46927721, 0.24584217, 0.26498308, 0.47077467]) - - + EIc = np.array( + [ + -0.00838113, + -0.0243949, + -0.07031778, + -0.21520869, + -0.16547163, + -0.00178435, + -0.11531888, + -0.36138555, + -0.05471258, + -0.09413562, + ] + ) + VIc = np.array( + [ + 0.03636013, + 0.10412408, + 0.28600769, + 0.26389674, + 0.21576683, + 0.00779261, + 0.44633942, + 0.57696508, + 0.12929777, + 0.3730742, + ] + ) + EI = -np.ones((10,)) / 9 + VI = np.array( + [ + 0.47374172, + 0.47356458, + 0.47209663, + 0.15866023, + 0.15972526, + 0.47376436, + 0.46927721, + 0.24584217, + 0.26498308, + 0.47077467, + ] + ) np.testing.assert_allclose(lm.EIc, EIc, rtol=RTOL, atol=ATOL) np.testing.assert_allclose(lm.VIc, VIc, rtol=RTOL, atol=ATOL) diff --git a/esda/tests/test_shape.py b/esda/tests/test_shape.py index eb3d5bbf..34feb00c 100644 --- a/esda/tests/test_shape.py +++ b/esda/tests/test_shape.py @@ -1,12 +1,12 @@ -from shapely import geometry -from numpy import testing, array import pytest +from numpy import array, testing +from shapely import geometry + +import esda pygeos = pytest.importorskip("pygeos") pytest.importorskip("numba") -from esda.shape import * - shape = array( [ @@ -31,94 +31,93 @@ def test_boundary_amplitude(): - observed = boundary_amplitude(shape) + observed = esda.shape.boundary_amplitude(shape) testing.assert_allclose(observed, 0.844527, atol=ATOL) def test_convex_hull_ratio(): - observed = convex_hull_ratio(shape) + observed = esda.shape.convex_hull_ratio(shape) testing.assert_allclose(observed, 0.7, atol=ATOL) def test_length_width_diff(): - observed = length_width_diff(shape) + observed = esda.shape.length_width_diff(shape) testing.assert_allclose(observed, 0.25, atol=ATOL) def test_radii_ratio(): - observed = radii_ratio(shape) + observed = esda.shape.radii_ratio(shape) testing.assert_allclose(observed, 0.659366, atol=ATOL) def test_diameter_ratio(): - observed = diameter_ratio(shape) + observed = esda.shape.diameter_ratio(shape) testing.assert_allclose(observed, 0.8, atol=ATOL) def test_iaq(): - observed = isoareal_quotient(shape) + observed = esda.shape.isoareal_quotient(shape) testing.assert_allclose(observed, 0.622314, atol=ATOL) def test_ipq(): - observed = isoperimetric_quotient(shape) + observed = esda.shape.isoperimetric_quotient(shape) testing.assert_allclose(observed, 0.387275, atol=ATOL) def test_moa(): - observed = moa_ratio(shape) + observed = esda.shape.moa_ratio(shape) testing.assert_allclose(observed, 3.249799, atol=ATOL) def test_moment_of_interia(): - observed = moment_of_inertia(shape) + observed = esda.shape.moment_of_inertia(shape) testing.assert_allclose(observed, 0.315715, atol=ATOL) def test_nmi(): - observed = nmi(shape) + observed = esda.shape.nmi(shape) testing.assert_allclose(observed, 0.487412, atol=ATOL) def test_mbc(): - observed = minimum_bounding_circle_ratio(shape) + observed = esda.shape.minimum_bounding_circle_ratio(shape) testing.assert_allclose(observed, 0.437571, atol=ATOL) def test_reflexive_angle_ratio(): - observed = reflexive_angle_ratio(shape) + observed = esda.shape.reflexive_angle_ratio(shape) testing.assert_allclose(observed, 3 / 8, atol=ATOL) def test_fractal_dimension(): r = [ - fractal_dimension(shape, support=support)[0] + esda.shape.fractal_dimension(shape, support=support)[0] for support in ("hex", "square", "circle") ] - testing.assert_allclose(r, [0.218144, -4.29504, 0.257882], atol=ATOL) def test_squareness(): - observed = squareness(shape) + observed = esda.shape.squareness(shape) testing.assert_allclose(observed, 0.493094, atol=ATOL) def test_rectangularity(): - observed = rectangularity(shape) + observed = esda.shape.rectangularity(shape) testing.assert_allclose(observed, 0.7, atol=ATOL) def test_shape_index(): - observed = shape_index(shape) + observed = esda.shape.shape_index(shape) testing.assert_allclose(observed, 0.659366, atol=ATOL) def test_equivalent_rectangular_index(): - observed = equivalent_rectangular_index(shape) + observed = esda.shape.equivalent_rectangular_index(shape) testing.assert_allclose(observed, 0.706581, atol=ATOL) def test_form_factor(): - observed = form_factor(shape, array([2])) + observed = esda.shape.form_factor(shape, array([2])) testing.assert_allclose(observed, 0.602535, atol=ATOL) diff --git a/esda/tests/test_silhouette.py b/esda/tests/test_silhouette.py index b0f9945f..56b13e77 100644 --- a/esda/tests/test_silhouette.py +++ b/esda/tests/test_silhouette.py @@ -1,124 +1,191 @@ -from .. import silhouettes -import geopandas +import unittest + import libpysal import numpy -import unittest from sklearn.metrics import pairwise +from .. import silhouettes + RTOL = libpysal.common.RTOL ATOL = libpysal.common.ATOL + class Silhouette_Tester(unittest.TestCase): def setUp(self): - self.w = libpysal.weights.lat2W(3,3, rook=False) + self.w = libpysal.weights.lat2W(3, 3, rook=False) numpy.random.seed(12345) - self.X = numpy.random.random((9,3)) - self.groups = numpy.array([[0,0,0], - [0,1,1], - [0,1,1]]).flatten() - self.precomputed = self.X @ self.X.T + self.X = numpy.random.random((9, 3)) + self.groups = numpy.array([[0, 0, 0], [0, 1, 1], [0, 1, 1]]).flatten() + self.precomputed = self.X @ self.X.T self.altmetric = pairwise.manhattan_distances def test_boundary(self): - known = numpy.array([ 0.03042804, - -0.26071434, - 0.26664155, - -0.09395997, - 0.02754655, - -0.30581229, - 0.2697089 , - -0.29993007, - 0. ]) + known = numpy.array( + [ + 0.03042804, + -0.26071434, + 0.26664155, + -0.09395997, + 0.02754655, + -0.30581229, + 0.2697089, + -0.29993007, + 0.0, + ] + ) test = silhouettes.boundary_silhouette(self.X, self.groups, self.w) numpy.testing.assert_allclose(known, test, rtol=RTOL, atol=ATOL) - known = numpy.array([ 0.04246261, - -0.17059996, - 0.35656939, - -0.10765454, - 0.00831288, - -0.30924291, - 0.36502035, - -0.25464692, - 0. ]) - test = silhouettes.boundary_silhouette(self.X, self.groups, self.w, - metric=self.altmetric) + known = numpy.array( + [ + 0.04246261, + -0.17059996, + 0.35656939, + -0.10765454, + 0.00831288, + -0.30924291, + 0.36502035, + -0.25464692, + 0.0, + ] + ) + test = silhouettes.boundary_silhouette( + self.X, self.groups, self.w, metric=self.altmetric + ) numpy.testing.assert_allclose(known, test, rtol=RTOL, atol=ATOL) - known = numpy.array([-0.43402484, - -0.29488836, - -0.3663925 , - -0.32831205, - -0.21536904, - -0.10134236, - -0.37144783, - -0.01618446, - 0. ]) - test = silhouettes.boundary_silhouette(self.X, self.groups, self.w, - metric=self.precomputed) + known = numpy.array( + [ + -0.43402484, + -0.29488836, + -0.3663925, + -0.32831205, + -0.21536904, + -0.10134236, + -0.37144783, + -0.01618446, + 0.0, + ] + ) + test = silhouettes.boundary_silhouette( + self.X, self.groups, self.w, metric=self.precomputed + ) numpy.testing.assert_allclose(known, test, rtol=RTOL, atol=ATOL) with self.assertRaises(AssertionError): - silhouettes.boundary_silhouette(self.X, self.groups, self.w, - metric=self.precomputed - self.precomputed.mean()) + silhouettes.boundary_silhouette( + self.X, + self.groups, + self.w, + metric=self.precomputed - self.precomputed.mean(), + ) def test_path(self): - known = numpy.array([ 0.15982274, - -0.02136909, - -0.3972349 , - 0.24479121, - 0.02754655, - 0.28465546, - -0.07572727, - 0.26903733, - 0.4165144 ]) + known = numpy.array( + [ + 0.15982274, + -0.02136909, + -0.3972349, + 0.24479121, + 0.02754655, + 0.28465546, + -0.07572727, + 0.26903733, + 0.4165144, + ] + ) test = silhouettes.path_silhouette(self.X, self.groups, self.w) numpy.testing.assert_allclose(known, test, rtol=RTOL, atol=ATOL) - known = numpy.array([ 0.1520476 , - 0.0390323 , - -0.34269345, - 0.27239358, - 0.00831288, - 0.27934432, - -0.03874118, - 0.28623703, - 0.40062121]) - test = silhouettes.path_silhouette(self.X, self.groups, self.w, - metric=self.altmetric) + known = numpy.array( + [ + 0.1520476, + 0.0390323, + -0.34269345, + 0.27239358, + 0.00831288, + 0.27934432, + -0.03874118, + 0.28623703, + 0.40062121, + ] + ) + test = silhouettes.path_silhouette( + self.X, self.groups, self.w, metric=self.altmetric + ) numpy.testing.assert_allclose(known, test, rtol=RTOL, atol=ATOL) with self.assertRaises(TypeError): - silhouettes.path_silhouette(self.X, self.groups, self.w, - metric=self.precomputed) + silhouettes.path_silhouette( + self.X, self.groups, self.w, metric=self.precomputed + ) with self.assertRaises(AssertionError): - silhouettes.path_silhouette(self.X, self.groups, self.w, - metric=lambda d: -self.altmetric(d)) + silhouettes.path_silhouette( + self.X, self.groups, self.w, metric=lambda d: -self.altmetric(d) + ) def test_nearest_label(self): - known = numpy.array([1,1,1,1,0,0,1,0,0]) - test,d = silhouettes.nearest_label(self.X, self.groups, return_distance=True) + known = numpy.array([1, 1, 1, 1, 0, 0, 1, 0, 0]) + test, d = silhouettes.nearest_label(self.X, self.groups, return_distance=True) numpy.testing.assert_array_equal(known, test) - known = numpy.array([0,1,0,0,1,0,0,0,1]) + known = numpy.array([0, 1, 0, 0, 1, 0, 0, 0, 1]) test = silhouettes.nearest_label(self.X, self.groups, keep_self=True) numpy.testing.assert_array_equal(test, known) - knownd = numpy.array([1.05707684, - 0.74780721, - 0.88841079, - 0.71628677, - 1.25964181, - 0.5854757 , - 0.89710073, - 0.64575898, - 0.73913526]) + knownd = numpy.array( + [ + 1.05707684, + 0.74780721, + 0.88841079, + 0.71628677, + 1.25964181, + 0.5854757, + 0.89710073, + 0.64575898, + 0.73913526, + ] + ) numpy.testing.assert_allclose(d, knownd, rtol=RTOL, atol=ATOL) def test_silhouette_alist(self): - known = numpy.array([0., 0., 0.22434244, 0., - 0., 0., -0.07589293, -0.07589293, - 0., 0.41331324, 0.41331324, 0., - 0., 0.11703681, 0., 0.11703681, - 0.27065991, 0.27065991, 0.27065991, - 0.27065991, 0., 0.27065991, - 0., 0., -0.07441639, -0.07441639, - 0., 0., 0., 0., 0.41576712, - 0.41576712, -0.06657343, 0., 0., - -0.06657343, 0., 0., 0., 0.]) + known = numpy.array( + [ + 0.0, + 0.0, + 0.22434244, + 0.0, + 0.0, + 0.0, + -0.07589293, + -0.07589293, + 0.0, + 0.41331324, + 0.41331324, + 0.0, + 0.0, + 0.11703681, + 0.0, + 0.11703681, + 0.27065991, + 0.27065991, + 0.27065991, + 0.27065991, + 0.0, + 0.27065991, + 0.0, + 0.0, + -0.07441639, + -0.07441639, + 0.0, + 0.0, + 0.0, + 0.0, + 0.41576712, + 0.41576712, + -0.06657343, + 0.0, + 0.0, + -0.06657343, + 0.0, + 0.0, + 0.0, + 0.0, + ] + ) test = silhouettes.silhouette_alist(self.X, self.groups, self.w.to_adjlist()) numpy.testing.assert_allclose(known, test.silhouette, rtol=RTOL, atol=ATOL) diff --git a/esda/tests/test_smaup.py b/esda/tests/test_smaup.py index 396397a7..01467fa1 100644 --- a/esda/tests/test_smaup.py +++ b/esda/tests/test_smaup.py @@ -1,63 +1,62 @@ import unittest + import libpysal -from libpysal.common import pandas, RTOL, ATOL -from ..smaup import Smaup -from ..moran import Moran import numpy as np +from libpysal.common import ATOL, RTOL, pandas +from ..moran import Moran +from ..smaup import Smaup PANDAS_EXTINCT = pandas is None + class Smaup_Tester(unittest.TestCase): - def setup(self): - from esda.moran import Moran + def setUp(self): self.w = libpysal.io.open(libpysal.examples.get_path("stl.gal")).read() f = libpysal.io.open(libpysal.examples.get_path("stl_hom.txt")) - self.y = np.array(f.by_col['HR8893']) + self.y = np.array(f.by_col["HR8893"]) self.rho = Moran(self.y, self.w).I self.n = len(self.y) - self.k = int(self.n/2) + self.k = int(self.n / 2) def test_smaup(self): - self.setup() sm = Smaup(self.n, self.k, self.rho) self.assertAlmostEqual(sm.n, 78) self.assertAlmostEqual(sm.k, 39) self.assertAlmostEqual(sm.rho, 0.24365582621771695) - np.testing.assert_allclose(sm.smaup, 0.15221341690376405, rtol=RTOL, atol=ATOL) + np.testing.assert_allclose(sm.smaup, 0.15221341690376405, rtol=RTOL, atol=ATOL) self.assertAlmostEqual(sm.critical_01, 0.38970613333333337) self.assertAlmostEqual(sm.critical_05, 0.3557221333333333) self.assertAlmostEqual(sm.critical_1, 0.3157950666666666) - self.assertEqual(sm.summary, 'Pseudo p-value > 0.10 (H0 is not rejected)') + self.assertEqual(sm.summary, "Pseudo p-value > 0.10 (H0 is not rejected)") def test_sids(self): - from esda import moran w = libpysal.io.open(libpysal.examples.get_path("sids2.gal")).read() f = libpysal.io.open(libpysal.examples.get_path("sids2.dbf")) SIDR = np.array(f.by_col("SIDR74")) - rho = moran.Moran(SIDR, w, two_tailed=False).I + rho = Moran(SIDR, w, two_tailed=False).I n = len(SIDR) - k = int(n/2) + k = int(n / 2) sm = Smaup(n, k, rho) - np.testing.assert_allclose(sm.smaup, 0.15176796553181948, rtol=RTOL, atol=ATOL) + np.testing.assert_allclose(sm.smaup, 0.15176796553181948, rtol=RTOL, atol=ATOL) self.assertAlmostEqual(sm.critical_01, 0.23404000000000003) self.assertAlmostEqual(sm.critical_05, 0.21088) self.assertAlmostEqual(sm.critical_1, 0.18239) - self.assertEqual(sm.summary, 'Pseudo p-value > 0.10 (H0 is not rejected)') + self.assertEqual(sm.summary, "Pseudo p-value > 0.10 (H0 is not rejected)") - @unittest.skipIf(PANDAS_EXTINCT, 'missing pandas') + @unittest.skipIf(PANDAS_EXTINCT, "missing pandas") def test_by_col(self): from libpysal.io import geotable as pdio - from esda import moran + np.random.seed(11213) - df = pdio.read_files(libpysal.examples.get_path('sids2.dbf')) + df = pdio.read_files(libpysal.examples.get_path("sids2.dbf")) w = libpysal.io.open(libpysal.examples.get_path("sids2.gal")).read() - k = int(w.n/2) - mi = moran.Moran.by_col(df, ['SIDR74'], w=w, two_tailed=False) + k = int(w.n / 2) + mi = Moran.by_col(df, ["SIDR74"], w=w, two_tailed=False) rho = np.unique(mi.SIDR74_moran.values).item() sm = Smaup(w.n, k, rho) np.testing.assert_allclose(sm.smaup, 0.15176796553181948, atol=ATOL, rtol=RTOL) self.assertAlmostEqual(sm.critical_01, 0.23404000000000003) self.assertAlmostEqual(sm.critical_05, 0.21088) self.assertAlmostEqual(sm.critical_1, 0.18239) - self.assertEqual(sm.summary, 'Pseudo p-value > 0.10 (H0 is not rejected)') + self.assertEqual(sm.summary, "Pseudo p-value > 0.10 (H0 is not rejected)") diff --git a/esda/tests/test_smoothing.py b/esda/tests/test_smoothing.py index df8fdc28..6dbffd9c 100644 --- a/esda/tests/test_smoothing.py +++ b/esda/tests/test_smoothing.py @@ -1,12 +1,15 @@ import unittest + import libpysal +import numpy as np +from libpysal.common import ATOL, RTOL, pandas from libpysal.weights.distance import KNN, Kernel + from .. import smoothing as sm -import numpy as np -from libpysal.common import RTOL, ATOL, pandas PANDAS_EXTINCT = pandas is None + class TestFlatten(unittest.TestCase): def setUp(self): self.input = [[1, 2], [3, 3, 4], [5, 6]] @@ -36,7 +39,7 @@ def setUp(self): self.e = np.array([30, 25, 25, 15, 33, 21, 30, 20]) self.b = np.array([1000, 1000, 1100, 900, 1000, 900, 1100, 900]) self.s_e = np.array([100, 45, 120, 100, 50, 30, 200, 80]) - self.s_b = s = np.array([1000, 900, 1000, 900, 1000, 900, 1000, 900]) + self.s_b = np.array([1000, 900, 1000, 900, 1000, 900, 1000, 900]) self.n = 2 def test_crude_age_standardization(self): @@ -45,25 +48,28 @@ def test_crude_age_standardization(self): self.assertEqual(list(crude), list(crude_exp)) def test_direct_age_standardization(self): - direct = np.array(sm.direct_age_standardization( - self.e, self.b, self.s_b, self.n)).round(8) - direct_exp = np.array([[0.02374402, 0.01920491, - 0.02904848], [0.02665072, 0.02177143, 0.03230508]]) + direct = np.array( + sm.direct_age_standardization(self.e, self.b, self.s_b, self.n) + ).round(8) + direct_exp = np.array( + [[0.02374402, 0.01920491, 0.02904848], [0.02665072, 0.02177143, 0.03230508]] + ) self.assertEqual(list(direct.flatten()), list(direct_exp.flatten())) def test_indirect_age_standardization(self): - indirect = np.array(sm.indirect_age_standardization( - self.e, self.b, self.s_e, self.s_b, self.n)).round(8) - indirect_exp = np.array([[0.02372382, 0.01940230, - 0.02900789], [0.02610803, .02154304, 0.03164035]]) - self.assertEqual( - list(indirect.flatten()), list(indirect_exp.flatten())) + indirect = np.array( + sm.indirect_age_standardization(self.e, self.b, self.s_e, self.s_b, self.n) + ).round(8) + indirect_exp = np.array( + [[0.02372382, 0.01940230, 0.02900789], [0.02610803, 0.02154304, 0.03164035]] + ) + self.assertEqual(list(indirect.flatten()), list(indirect_exp.flatten())) class TestSRate(unittest.TestCase): def setUp(self): - sids = libpysal.io.open(libpysal.examples.get_path('sids2.dbf'), 'r') - self.w = libpysal.io.open(libpysal.examples.get_path('sids2.gal'), 'r').read() + sids = libpysal.io.open(libpysal.examples.get_path("sids2.dbf"), "r") + self.w = libpysal.io.open(libpysal.examples.get_path("sids2.gal"), "r").read() self.b, self.e = np.array(sids[:, 8]), np.array(sids[:, 9]) self.er = [0.453433, 0.000000, 0.775871, 0.973810, 3.133190] self.eb = [0.0016973, 0.0017054, 0.0017731, 0.0020129, 0.0035349] @@ -71,99 +77,101 @@ def setUp(self): self.smr = [0.00083622, 0.00109402, 0.00081567, 0.0, 0.0048209] self.smr_w = [0.00127146, 0.00127146, 0.0008433, 0.0, 0.0049889] self.smr2 = [0.00091659, 0.00087641, 0.00091073, 0.0, 0.00467633] - self.s_ebr10 = np.array([4.01485749e-05, 3.62437513e-05, - 4.93034844e-05, 5.09387329e-05, 3.72735210e-05, - 3.69333797e-05, 5.40245456e-05, 2.99806055e-05, - 3.73034109e-05, 3.47270722e-05]).reshape(-1,1) - - stl_ex = libpysal.examples.load_example('stl') - self.stl = libpysal.io.open(stl_ex.get_path('stl_hom.csv'), 'r') + self.s_ebr10 = np.array( + [ + 4.01485749e-05, + 3.62437513e-05, + 4.93034844e-05, + 5.09387329e-05, + 3.72735210e-05, + 3.69333797e-05, + 5.40245456e-05, + 2.99806055e-05, + 3.73034109e-05, + 3.47270722e-05, + ] + ).reshape(-1, 1) + + stl_ex = libpysal.examples.load_example("stl") + self.stl = libpysal.io.open(stl_ex.get_path("stl_hom.csv"), "r") self.stl_e, self.stl_b = np.array(self.stl[:, 10]), np.array(self.stl[:, 13]) - self.stl_w = libpysal.io.open(stl_ex.get_path('stl.gal'), 'r').read() + self.stl_w = libpysal.io.open(stl_ex.get_path("stl.gal"), "r").read() if not self.stl_w.id_order_set: self.stl_w.id_order = list(range(1, len(self.stl) + 1)) if not PANDAS_EXTINCT: - self.df = libpysal.io.open(libpysal.examples.get_path('sids2.dbf')).to_df() - self.ename = 'SID74' - self.bname = 'BIR74' - self.stl_df = libpysal.io.open(libpysal.examples.get_path('stl_hom.csv')).to_df() - self.stl_ename = 'HC7984' - self.stl_bname = 'PO7984' + self.df = libpysal.io.open(libpysal.examples.get_path("sids2.dbf")).to_df() + self.ename = "SID74" + self.bname = "BIR74" + self.stl_df = libpysal.io.open( + libpysal.examples.get_path("stl_hom.csv") + ).to_df() + self.stl_ename = "HC7984" + self.stl_bname = "PO7984" def test_Excess_Risk(self): out_er = sm.Excess_Risk(self.e, self.b).r - np.testing.assert_allclose(out_er[:5].flatten(), self.er, - rtol=RTOL, atol=ATOL) + np.testing.assert_allclose(out_er[:5].flatten(), self.er, rtol=RTOL, atol=ATOL) - @unittest.skipIf(PANDAS_EXTINCT, 'missing pandas') + @unittest.skipIf(PANDAS_EXTINCT, "missing pandas") def test_Excess_Risk_tabular(self): out_er = sm.Excess_Risk(self.df[self.ename], self.df[self.bname]).r - np.testing.assert_allclose(out_er[:5].flatten(), self.er, - rtol=RTOL, atol=ATOL) + np.testing.assert_allclose(out_er[:5].flatten(), self.er, rtol=RTOL, atol=ATOL) self.assertIsInstance(out_er, np.ndarray) out_er = sm.Excess_Risk.by_col(self.df, self.ename, self.bname) - outcol = '{}-{}_excess_risk'.format(self.ename, self.bname) + outcol = "{}-{}_excess_risk".format(self.ename, self.bname) outcol = out_er[outcol] - np.testing.assert_allclose(outcol[:5], self.er, - rtol=RTOL, atol=ATOL) + np.testing.assert_allclose(outcol[:5], self.er, rtol=RTOL, atol=ATOL) self.assertIsInstance(outcol.values, np.ndarray) def test_Empirical_Bayes(self): out_eb = sm.Empirical_Bayes(self.e, self.b).r - np.testing.assert_allclose(out_eb[:5].flatten(), self.eb, - rtol=RTOL, atol=ATOL) + np.testing.assert_allclose(out_eb[:5].flatten(), self.eb, rtol=RTOL, atol=ATOL) - @unittest.skipIf(PANDAS_EXTINCT, 'missing pandas') + @unittest.skipIf(PANDAS_EXTINCT, "missing pandas") def test_Empirical_Bayes_tabular(self): out_eb = sm.Empirical_Bayes(self.df[self.ename], self.df[self.bname]).r - np.testing.assert_allclose(out_eb[:5].flatten(), self.eb, - rtol=RTOL, atol=ATOL) + np.testing.assert_allclose(out_eb[:5].flatten(), self.eb, rtol=RTOL, atol=ATOL) self.assertIsInstance(out_eb, np.ndarray) out_eb = sm.Empirical_Bayes.by_col(self.df, self.ename, self.bname) - outcol = '{}-{}_empirical_bayes'.format(self.ename, self.bname) + outcol = "{}-{}_empirical_bayes".format(self.ename, self.bname) outcol = out_eb[outcol] - np.testing.assert_allclose(outcol[:5], self.eb, - rtol=RTOL, atol=ATOL) + np.testing.assert_allclose(outcol[:5], self.eb, rtol=RTOL, atol=ATOL) self.assertIsInstance(outcol.values, np.ndarray) def test_Spatial_Empirical_Bayes(self): s_eb = sm.Spatial_Empirical_Bayes(self.stl_e, self.stl_b, self.stl_w) - np.testing.assert_allclose(self.s_ebr10, s_eb.r[:10], - rtol=RTOL, atol=ATOL) + np.testing.assert_allclose(self.s_ebr10, s_eb.r[:10], rtol=RTOL, atol=ATOL) - @unittest.skipIf(PANDAS_EXTINCT, 'missing pandas') + @unittest.skipIf(PANDAS_EXTINCT, "missing pandas") def test_Spatial_Empirical_Bayes_tabular(self): - s_eb = sm.Spatial_Empirical_Bayes(self.stl_df[self.stl_ename], - self.stl_df[self.stl_bname], - self.stl_w).r + s_eb = sm.Spatial_Empirical_Bayes( + self.stl_df[self.stl_ename], self.stl_df[self.stl_bname], self.stl_w + ).r self.assertIsInstance(s_eb, np.ndarray) np.testing.assert_allclose(self.s_ebr10, s_eb[:10]) - s_eb = sm.Spatial_Empirical_Bayes.by_col(self.stl_df, - self.stl_ename, - self.stl_bname, - self.stl_w) - outcol = '{}-{}_spatial_empirical_bayes'.format(self.stl_ename, self.stl_bname) + s_eb = sm.Spatial_Empirical_Bayes.by_col( + self.stl_df, self.stl_ename, self.stl_bname, self.stl_w + ) + outcol = "{}-{}_spatial_empirical_bayes".format(self.stl_ename, self.stl_bname) r = s_eb[outcol].values self.assertIsInstance(r, np.ndarray) - np.testing.assert_allclose(self.s_ebr10, r[:10].reshape(-1,1)) + np.testing.assert_allclose(self.s_ebr10, r[:10].reshape(-1, 1)) def test_Spatial_Rate(self): out_sr = sm.Spatial_Rate(self.e, self.b, self.w).r - np.testing.assert_allclose(out_sr[:5].flatten(), self.sr, - rtol=RTOL, atol=ATOL) + np.testing.assert_allclose(out_sr[:5].flatten(), self.sr, rtol=RTOL, atol=ATOL) - @unittest.skipIf(PANDAS_EXTINCT, 'missing pandas') + @unittest.skipIf(PANDAS_EXTINCT, "missing pandas") def test_Spatial_Rate_tabular(self): out_sr = sm.Spatial_Rate(self.df[self.ename], self.df[self.bname], self.w).r - np.testing.assert_allclose(out_sr[:5].flatten(), self.sr, - rtol=RTOL, atol=ATOL) + np.testing.assert_allclose(out_sr[:5].flatten(), self.sr, rtol=RTOL, atol=ATOL) self.assertIsInstance(out_sr, np.ndarray) - out_sr = sm.Spatial_Rate.by_col(self.df, self.ename, self.bname,w=self.w) - outcol = '{}-{}_spatial_rate'.format(self.ename, self.bname) + out_sr = sm.Spatial_Rate.by_col(self.df, self.ename, self.bname, w=self.w) + outcol = "{}-{}_spatial_rate".format(self.ename, self.bname) r = out_sr[outcol].values self.assertIsInstance(r, np.ndarray) np.testing.assert_allclose(r[:5], self.sr, rtol=RTOL, atol=ATOL) @@ -172,129 +180,139 @@ def test_Spatial_Median_Rate(self): out_smr = sm.Spatial_Median_Rate(self.e, self.b, self.w).r out_smr_w = sm.Spatial_Median_Rate(self.e, self.b, self.w, aw=self.b).r out_smr2 = sm.Spatial_Median_Rate(self.e, self.b, self.w, iteration=2).r - np.testing.assert_allclose(out_smr[:5].flatten(), self.smr, - atol=ATOL, rtol=RTOL) - np.testing.assert_allclose(out_smr_w[:5].flatten(), self.smr_w, - atol=ATOL, rtol=RTOL) - np.testing.assert_allclose(out_smr2[:5].flatten(), self.smr2, - atol=ATOL, rtol=RTOL) - - @unittest.skipIf(PANDAS_EXTINCT, 'missing pandas') + np.testing.assert_allclose( + out_smr[:5].flatten(), self.smr, atol=ATOL, rtol=RTOL + ) + np.testing.assert_allclose( + out_smr_w[:5].flatten(), self.smr_w, atol=ATOL, rtol=RTOL + ) + np.testing.assert_allclose( + out_smr2[:5].flatten(), self.smr2, atol=ATOL, rtol=RTOL + ) + + @unittest.skipIf(PANDAS_EXTINCT, "missing pandas") def test_Spatial_Median_Rate_tabular(self): - out_smr = sm.Spatial_Median_Rate(self.df[self.ename], - self.df[self.bname], - self.w).r - out_smr_w = sm.Spatial_Median_Rate(self.df[self.ename], - self.df[self.bname], - self.w, - aw = self.df[self.bname]).r - out_smr2 = sm.Spatial_Median_Rate(self.df[self.ename], - self.df[self.bname], - self.w, - iteration=2).r + out_smr = sm.Spatial_Median_Rate( + self.df[self.ename], self.df[self.bname], self.w + ).r + out_smr_w = sm.Spatial_Median_Rate( + self.df[self.ename], self.df[self.bname], self.w, aw=self.df[self.bname] + ).r + out_smr2 = sm.Spatial_Median_Rate( + self.df[self.ename], self.df[self.bname], self.w, iteration=2 + ).r self.assertIsInstance(out_smr, np.ndarray) self.assertIsInstance(out_smr_w, np.ndarray) self.assertIsInstance(out_smr2, np.ndarray) - np.testing.assert_allclose(out_smr[:5].flatten(), self.smr, - atol=ATOL, rtol=RTOL) - np.testing.assert_allclose(out_smr_w[:5].flatten(), self.smr_w, - atol=ATOL, rtol=RTOL) - np.testing.assert_allclose(out_smr2[:5].flatten(), self.smr2, - atol=ATOL, rtol=RTOL) - - out_smr = sm.Spatial_Median_Rate.by_col(self.df, self.ename, - self.bname, self.w) - out_smr_w = sm.Spatial_Median_Rate.by_col(self.df, self.ename, - self.bname, self.w, - aw = self.df[self.bname]) - out_smr2 = sm.Spatial_Median_Rate.by_col(self.df, self.ename, - self.bname, self.w, - iteration=2) - outcol = '{}-{}_spatial_median_rate'.format(self.ename, self.bname) - - np.testing.assert_allclose(out_smr[outcol].values[:5], self.smr, - rtol=RTOL, atol=ATOL) - np.testing.assert_allclose(out_smr_w[outcol].values[:5], self.smr_w, - rtol=RTOL, atol=ATOL) - np.testing.assert_allclose(out_smr2[outcol].values[:5], self.smr2, - rtol=RTOL, atol=ATOL) - - @unittest.skipIf(PANDAS_EXTINCT, 'missing pandas') + np.testing.assert_allclose( + out_smr[:5].flatten(), self.smr, atol=ATOL, rtol=RTOL + ) + np.testing.assert_allclose( + out_smr_w[:5].flatten(), self.smr_w, atol=ATOL, rtol=RTOL + ) + np.testing.assert_allclose( + out_smr2[:5].flatten(), self.smr2, atol=ATOL, rtol=RTOL + ) + + out_smr = sm.Spatial_Median_Rate.by_col(self.df, self.ename, self.bname, self.w) + out_smr_w = sm.Spatial_Median_Rate.by_col( + self.df, self.ename, self.bname, self.w, aw=self.df[self.bname] + ) + out_smr2 = sm.Spatial_Median_Rate.by_col( + self.df, self.ename, self.bname, self.w, iteration=2 + ) + outcol = "{}-{}_spatial_median_rate".format(self.ename, self.bname) + + np.testing.assert_allclose( + out_smr[outcol].values[:5], self.smr, rtol=RTOL, atol=ATOL + ) + np.testing.assert_allclose( + out_smr_w[outcol].values[:5], self.smr_w, rtol=RTOL, atol=ATOL + ) + np.testing.assert_allclose( + out_smr2[outcol].values[:5], self.smr2, rtol=RTOL, atol=ATOL + ) + + @unittest.skipIf(PANDAS_EXTINCT, "missing pandas") def test_Spatial_Smoother_multicol(self): """ test that specifying multiple columns works correctly. Since the function is shared over all spatial smoothers, we can only test one. """ - enames = [self.ename, 'SID79'] - bnames = [self.bname, 'BIR79'] + enames = [self.ename, "SID79"] + bnames = [self.bname, "BIR79"] out_df = sm.Spatial_Median_Rate.by_col(self.df, enames, bnames, self.w) - outcols = ['{}-{}_spatial_median_rate'.format(e,b) - for e,b in zip(enames, bnames)] - smr79 = np.array([0.00122129, 0.00176924, 0.00176924, - 0.00240964, 0.00272035]) + outcols = [ + "{}-{}_spatial_median_rate".format(e, b) for e, b in zip(enames, bnames) + ] + smr79 = np.array( + [0.00122129, 0.00176924, 0.00176924, 0.00240964, 0.00272035] + ) answers = [self.smr, smr79] for col, answer in zip(outcols, answer): self.assertIn(out_df.columns, col) - np.testing.assert_allclose(out_df[col].values[:5], answer, - rtol=RTOL, atol=ATOL) + np.testing.assert_allclose( + out_df[col].values[:5], answer, rtol=RTOL, atol=ATOL + ) - @unittest.skipIf(PANDAS_EXTINCT, 'missing pandas') + @unittest.skipIf(PANDAS_EXTINCT, "missing pandas") def test_Smoother_multicol(self): """ test that non-spatial smoothers work with multicolumn queries """ - enames = [self.ename, 'SID79'] - bnames = [self.bname, 'BIR79'] + enames = [self.ename, "SID79"] + bnames = [self.bname, "BIR79"] out_df = sm.Excess_Risk.by_col(self.df, enames, bnames) - outcols = ['{}-{}_excess_risk'.format(e,b) - for e,b in zip(enames, bnames)] - er79 = np.array([0.000000, 2.796607, 0.8383863, - 1.217479, 0.943811]) + outcols = ["{}-{}_excess_risk".format(e, b) for e, b in zip(enames, bnames)] + er79 = np.array([0.000000, 2.796607, 0.8383863, 1.217479, 0.943811]) answers = [self.er, er79] for col, answer in zip(outcols, answer): self.assertIn(out_df.columns, col) - np.testing.assert_allclose(out_df[col].values[:5], answer, - rtol=RTOL, atol=ATOL) + np.testing.assert_allclose( + out_df[col].values[:5], answer, rtol=RTOL, atol=ATOL + ) + class TestHB(unittest.TestCase): def setUp(self): - sids = libpysal.io.open(libpysal.examples.get_path('sids2.shp'), 'r') + sids = libpysal.io.open(libpysal.examples.get_path("sids2.shp"), "r") self.sids = sids self.d = np.array([i.centroid for i in sids]) self.w = KNN.from_array(self.d, k=5) if not self.w.id_order_set: self.w.id_order = self.w.id_order - sids_db = libpysal.io.open(libpysal.examples.get_path('sids2.dbf'), 'r') + sids_db = libpysal.io.open(libpysal.examples.get_path("sids2.dbf"), "r") self.b, self.e = np.array(sids_db[:, 8]), np.array(sids_db[:, 9]) - self.sids_hb_rr5 = np.array([0.00075586, 0., - 0.0008285, 0.0018315, 0.00498891]) - self.sids_hb_r2r5 = np.array([0.0008285, 0.00084331, - 0.00086896, 0.0018315, 0.00498891]) - self.sids_hb_r3r5 = np.array([0.00091659, 0., - 0.00156838, 0.0018315, 0.00498891]) + self.sids_hb_rr5 = np.array([0.00075586, 0.0, 0.0008285, 0.0018315, 0.00498891]) + self.sids_hb_r2r5 = np.array( + [0.0008285, 0.00084331, 0.00086896, 0.0018315, 0.00498891] + ) + self.sids_hb_r3r5 = np.array( + [0.00091659, 0.0, 0.00156838, 0.0018315, 0.00498891] + ) if not PANDAS_EXTINCT: self.df = sids_db.to_df() - self.ename = 'SID74' - self.bname = 'BIR74' - self.enames = [self.ename, 'SID79'] - self.bnames = [self.bname, 'BIR79'] - self.sids79r = np.array([.000563, .001659, .001879, - .002410, .002720]) - @unittest.skip('Deprecated') + self.ename = "SID74" + self.bname = "BIR74" + self.enames = [self.ename, "SID79"] + self.bnames = [self.bname, "BIR79"] + self.sids79r = np.array([0.000563, 0.001659, 0.001879, 0.002410, 0.002720]) + + @unittest.skip("Deprecated") def test_Headbanging_Triples(self): ht = sm.Headbanging_Triples(self.d, self.w) self.assertEqual(len(ht.triples), len(self.d)) ht2 = sm.Headbanging_Triples(self.d, self.w, edgecor=True) - self.assertTrue(hasattr(ht2, 'extra')) + self.assertTrue(hasattr(ht2, "extra")) self.assertEqual(len(ht2.triples), len(self.d)) htr = sm.Headbanging_Median_Rate(self.e, self.b, ht2, iteration=5) self.assertEqual(len(htr.r), len(self.e)) for i in htr.r: self.assertTrue(i is not None) - @unittest.skip('Deprecated') + @unittest.skip("Deprecated") def test_Headbanging_Median_Rate(self): s_ht = sm.Headbanging_Triples(self.d, self.w, k=5) sids_hb_r = sm.Headbanging_Median_Rate(self.e, self.b, s_ht) @@ -304,39 +322,45 @@ def test_Headbanging_Median_Rate(self): sids_hb_r3 = sm.Headbanging_Median_Rate(self.e, self.b, s_ht, aw=self.b) np.testing.assert_array_almost_equal(self.sids_hb_r3r5, sids_hb_r3.r[:5]) - #@unittest.skipIf(PANDAS_EXTINCT, 'missing pandas') - @unittest.skip('Deprecated') + # @unittest.skipIf(PANDAS_EXTINCT, 'missing pandas') + @unittest.skip("Deprecated") def test_Headbanging_Median_Rate_tabular(self): # test that dataframe columns are treated correctly s_ht = sm.Headbanging_Triples(self.d, self.w, k=5) - sids_hb_r = sm.Headbanging_Median_Rate(self.df[self.ename], - self.df[self.bname], s_ht) + sids_hb_r = sm.Headbanging_Median_Rate( + self.df[self.ename], self.df[self.bname], s_ht + ) self.assertIsInstance(sids_hb_r.r, np.ndarray) np.testing.assert_array_almost_equal(self.sids_hb_rr5, sids_hb_r.r[:5]) - sids_hb_r2 = sm.Headbanging_Median_Rate(self.df[self.ename], - self.df[self.bname], s_ht, - iteration=5) + sids_hb_r2 = sm.Headbanging_Median_Rate( + self.df[self.ename], self.df[self.bname], s_ht, iteration=5 + ) self.assertIsInstance(sids_hb_r2.r, np.ndarray) np.testing.assert_array_almost_equal(self.sids_hb_r2r5, sids_hb_r2.r[:5]) - sids_hb_r3 = sm.Headbanging_Median_Rate(self.df[self.ename], - self.df[self.bname], s_ht, - aw=self.df[self.bname]) + sids_hb_r3 = sm.Headbanging_Median_Rate( + self.df[self.ename], self.df[self.bname], s_ht, aw=self.df[self.bname] + ) self.assertIsInstance(sids_hb_r3.r, np.ndarray) np.testing.assert_array_almost_equal(self.sids_hb_r3r5, sids_hb_r3.r[:5]) - #test that the by col on multiple names works correctly - sids_hr_df = sm.Headbanging_Median_Rate.by_col(self.df, self.enames, - self.bnames, w=self.w) - outcols = ['{}-{}_headbanging_median_rate'.format(e,b) for e,b in - zip(self.enames, self.bnames)] + # test that the by col on multiple names works correctly + sids_hr_df = sm.Headbanging_Median_Rate.by_col( + self.df, self.enames, self.bnames, w=self.w + ) + outcols = [ + "{}-{}_headbanging_median_rate".format(e, b) + for e, b in zip(self.enames, self.bnames) + ] for col, answer in zip(outcols, [self.sids_hb_rr5, self.sids79r]): this_col = sids_hr_df[col].values self.assertIsInstance(this_col, np.ndarray) - np.testing.assert_allclose(sids_hr_df[col][:5], answer, - rtol=RTOL, atol=ATOL*10) + np.testing.assert_allclose( + sids_hr_df[col][:5], answer, rtol=RTOL, atol=ATOL * 10 + ) + class TestKernel_AgeAdj_SM(unittest.TestCase): def setUp(self): @@ -345,8 +369,7 @@ def setUp(self): self.e1 = np.array([10, 8, 1, 4, 3, 5, 4, 3, 2, 1, 5, 3]) self.b1 = np.array([100, 90, 15, 30, 25, 20, 30, 20, 80, 80, 90, 60]) self.s = np.array([98, 88, 15, 29, 20, 23, 33, 25, 76, 80, 89, 66]) - self.points = [( - 10, 10), (20, 10), (40, 10), (15, 20), (30, 20), (30, 30)] + self.points = [(10, 10), (20, 10), (40, 10), (15, 20), (30, 20), (30, 30)] self.kw = Kernel(self.points) self.points1 = np.array(self.points) + 5 self.points1 = np.vstack((self.points1, self.points)) @@ -355,43 +378,88 @@ def setUp(self): self.kw.id_order = list(range(0, len(self.points))) if not PANDAS_EXTINCT: import pandas as pd + dfa = np.array([self.e, self.b]).T dfb = np.array([self.e1, self.b1, self.s]).T - self.dfa = pd.DataFrame(dfa, columns=['e','b']) - self.dfb = pd.DataFrame(dfb, columns=['e', 'b', 's']) - - #answers - self.kernel_exp = [0.10543301, 0.0858573, 0.08256196, 0.09884584, - 0.04756872, 0.04845298] - self.ageadj_exp = [0.10519625, 0.08494318, 0.06440072, 0.06898604, - 0.06952076, 0.05020968] - self.disk_exp = [0.12222222000000001, 0.10833333, 0.08055556, - 0.08944444, 0.09944444, 0.09351852] - self.sf_exp = np.array([ 0.111111, 0.111111, 0.085106, 0.076923]) + self.dfa = pd.DataFrame(dfa, columns=["e", "b"]) + self.dfb = pd.DataFrame(dfb, columns=["e", "b", "s"]) + + # answers + self.kernel_exp = [ + 0.10543301, + 0.0858573, + 0.08256196, + 0.09884584, + 0.04756872, + 0.04845298, + ] + self.ageadj_exp = [ + 0.10519625, + 0.08494318, + 0.06440072, + 0.06898604, + 0.06952076, + 0.05020968, + ] + self.disk_exp = [ + 0.12222222000000001, + 0.10833333, + 0.08055556, + 0.08944444, + 0.09944444, + 0.09351852, + ] + self.sf_exp = np.array([0.111111, 0.111111, 0.085106, 0.076923]) def test_Kernel_Smoother(self): kr = sm.Kernel_Smoother(self.e, self.b, self.kw) np.testing.assert_allclose(kr.r.flatten(), self.kernel_exp) - @unittest.skipIf(PANDAS_EXTINCT, 'missing pandas') + @unittest.skipIf(PANDAS_EXTINCT, "missing pandas") def test_Kernel_Smoother_tabular(self): dfa, dfb = self.dfa, self.dfb - kr = sm.Kernel_Smoother(dfa['e'], dfa['b'], self.kw) + kr = sm.Kernel_Smoother(dfa["e"], dfa["b"], self.kw) np.testing.assert_allclose(kr.r.flatten(), kernel_exp) - kr = sm.Kernel_Smoother.by_col(dfa, 'e', 'b', w=self.kw) - colname = 'e_b_kernel_smoother' + kr = sm.Kernel_Smoother.by_col(dfa, "e", "b", w=self.kw) + colname = "e_b_kernel_smoother" np.testing.assert_allclose(kr[colname].values, kernel_exp) - kr = sm.Kernel_Smoother.by_col(dfb, ['e', 's'], 'b', w=self.kw) - outcols = ['{}-b_kernel_smoother'.format(l) for l in ['e','s']] - - exp_eb = np.array([ 0.08276363, 0.08096262, 0.03636364, 0.0704302 , - 0.07996067, 0.1287226 , 0.09831286, 0.0952105 , - 0.02857143, 0.06671039, 0.07129231, 0.08078792]) - exp_sb = np.array([ 1.00575463, 0.99597005, 0.96363636, 0.99440132, - 0.98468399, 1.07912333, 1.03376267, 1.02759815, - 0.95428572, 0.99716186, 0.98277235, 1.03906155]) + kr = sm.Kernel_Smoother.by_col(dfb, ["e", "s"], "b", w=self.kw) + outcols = ["{}-b_kernel_smoother".format(l) for l in ["e", "s"]] # noqa E741 + + exp_eb = np.array( + [ + 0.08276363, + 0.08096262, + 0.03636364, + 0.0704302, + 0.07996067, + 0.1287226, + 0.09831286, + 0.0952105, + 0.02857143, + 0.06671039, + 0.07129231, + 0.08078792, + ] + ) + exp_sb = np.array( + [ + 1.00575463, + 0.99597005, + 0.96363636, + 0.99440132, + 0.98468399, + 1.07912333, + 1.03376267, + 1.02759815, + 0.95428572, + 0.99716186, + 0.98277235, + 1.03906155, + ] + ) for name, answer in zip(outcols, [exp_eb, exp_sb]): np.testing.assert_allclose(kr[name].values, answer, rtol=RTOL, atol=ATOL) @@ -399,35 +467,37 @@ def test_Age_Adjusted_Smoother(self): ar = sm.Age_Adjusted_Smoother(self.e1, self.b1, self.kw, self.s) np.testing.assert_allclose(ar.r, self.ageadj_exp) - @unittest.skipIf(PANDAS_EXTINCT, 'missing pandas') + @unittest.skipIf(PANDAS_EXTINCT, "missing pandas") def test_Age_Adjusted_Smoother_tabular(self): dfb = self.dfb kr = sm.Age_Adjusted_Smoother(dfb.e, dfb.b, s=dfb.s, w=self.kw) self.assertIsInstance(kr.r, np.ndarray) np.testing.assert_allclose(kr.r, self.ageadj_exp) - kr = sm.Age_Adjusted_Smoother.by_col(dfb, 'e', 'b', w=self.kw, s='s') - answer = np.array([ 0.10519625, 0.08494318, 0.06440072, - 0.06898604, 0.06952076, 0.05020968]) - colname = 'e-b_age_adjusted_smoother' + kr = sm.Age_Adjusted_Smoother.by_col(dfb, "e", "b", w=self.kw, s="s") + answer = np.array( + [0.10519625, 0.08494318, 0.06440072, 0.06898604, 0.06952076, 0.05020968] + ) + colname = "e-b_age_adjusted_smoother" np.testing.assert_allclose(kr[colname].values, answer, rtol=RTOL, atol=ATOL) def test_Disk_Smoother(self): - self.kw.transform = 'b' + self.kw.transform = "b" disk = sm.Disk_Smoother(self.e, self.b, self.kw) np.testing.assert_allclose(disk.r.flatten(), self.disk_exp) - @unittest.skipIf(PANDAS_EXTINCT, 'missing pandas') + @unittest.skipIf(PANDAS_EXTINCT, "missing pandas") def test_Disk_Smoother_tabular(self): - self.kw.transform = 'b' + self.kw.transform = "b" dfa = self.dfa disk = sm.Disk_Smoother(dfa.e, dfa.b, self.kw).r np.testing.assert_allclose(disk.flatten(), self.disk_exp) - disk = sm.Disk_Smoother.by_col(dfa, 'e', 'b', self.kw) - col = 'e-b_disk_smoother' - np.testing.assert_allclose(disk[col].values, self.disk_exp, - rtol=RTOL, atol=ATOL) + disk = sm.Disk_Smoother.by_col(dfa, "e", "b", self.kw) + col = "e-b_disk_smoother" + np.testing.assert_allclose( + disk[col].values, self.disk_exp, rtol=RTOL, atol=ATOL + ) def test_Spatial_Filtering(self): points = np.array(self.points) @@ -435,35 +505,56 @@ def test_Spatial_Filtering(self): sf = sm.Spatial_Filtering(bbox, points, self.e, self.b, 2, 2, r=30) np.testing.assert_allclose(sf.r, self.sf_exp, rtol=RTOL, atol=ATOL) - @unittest.skipIf(PANDAS_EXTINCT, 'missing pandas') + @unittest.skipIf(PANDAS_EXTINCT, "missing pandas") def test_Kernel_Smoother_tabular(self): point_array = np.array(self.points) - bbox = [[0,0] , [45,45]] + bbox = [[0, 0], [45, 45]] dfa = self.dfa sf = sm.Spatial_Filtering(bbox, point_array, dfa.e, dfa.b, 2, 2, r=30) np.testing.assert_allclose(sf.r, self.sf_exp, rtol=RTOL, atol=ATOL) - dfa['geometry'] = self.points - sf = sm.Spatial_Filtering.by_col(dfa, 'e', 'b', 3, 3, r=30) - r_answer = np.array([ 0.07692308, 0.07213115, 0.07213115, 0.07692308, - 0.07692308, 0.07692308, 0.07692308, 0.07692308, - 0.07692308]) + dfa["geometry"] = self.points + sf = sm.Spatial_Filtering.by_col(dfa, "e", "b", 3, 3, r=30) + r_answer = np.array( + [ + 0.07692308, + 0.07213115, + 0.07213115, + 0.07692308, + 0.07692308, + 0.07692308, + 0.07692308, + 0.07692308, + 0.07692308, + ] + ) x_answer = np.array([10.0, 10.0, 10.0, 20.0, 20.0, 20.0, 30.0, 30.0, 30.0]) - y_answer = np.array([10.000000, 16.666667, 23.333333, - 10.000000, 16.666667, 23.333333, - 10.000000, 16.666667, 23.333333]) - columns = ['e-b_spatial_filtering_{}'.format(name) for name in ['X', 'Y', 'R']] + y_answer = np.array( + [ + 10.000000, + 16.666667, + 23.333333, + 10.000000, + 16.666667, + 23.333333, + 10.000000, + 16.666667, + 23.333333, + ] + ) + columns = ["e-b_spatial_filtering_{}".format(name) for name in ["X", "Y", "R"]] for col, answer in zip(columns, [x_answer, y_answer, r_answer]): np.testing.assert_allclose(sf[col].values, answer, rtol=RTOL, atol=ATOL) + class TestUtils(unittest.TestCase): def test_sum_by_n(self): d = np.array([10, 9, 20, 30]) w = np.array([0.5, 0.1, 0.3, 0.8]) n = 2 - exp_sum = np.array([5.9, 30.]) + exp_sum = np.array([5.9, 30.0]) np.testing.assert_array_almost_equal(exp_sum, sm.sum_by_n(d, w, n)) def test_standardized_mortality_ratio(self): @@ -473,8 +564,9 @@ def test_standardized_mortality_ratio(self): s_b = np.array([1000, 900, 1000, 900, 1000, 900, 1000, 900]) n = 2 exp_smr = np.array([2.48691099, 2.73684211]) - np.testing.assert_array_almost_equal(exp_smr, - sm.standardized_mortality_ratio(e, b, s_e, s_b, n)) + np.testing.assert_array_almost_equal( + exp_smr, sm.standardized_mortality_ratio(e, b, s_e, s_b, n) + ) def test_choynowski(self): e = np.array([30, 25, 25, 15, 33, 21, 30, 20]) @@ -486,19 +578,24 @@ def test_choynowski(self): def test_assuncao_rate(self): e = np.array([30, 25, 25, 15, 33, 21, 30, 20]) b = np.array([100, 100, 110, 90, 100, 90, 110, 90]) - exp_assuncao = np.array([1.03843594, -0.04099089, -0.56250375, - -1.73061861]) - np.testing.assert_array_almost_equal( - exp_assuncao, sm.assuncao_rate(e, b)[:4]) + exp_assuncao = np.array([1.03843594, -0.04099089, -0.56250375, -1.73061861]) + np.testing.assert_array_almost_equal(exp_assuncao, sm.assuncao_rate(e, b)[:4]) suite = unittest.TestSuite() -test_classes = [TestFlatten, TestWMean, TestAgeStd, TestSRate, TestHB, - TestKernel_AgeAdj_SM, TestUtils] +test_classes = [ + TestFlatten, + TestWMean, + TestAgeStd, + TestSRate, + TestHB, + TestKernel_AgeAdj_SM, + TestUtils, +] for i in test_classes: a = unittest.TestLoader().loadTestsFromTestCase(i) suite.addTest(a) -if __name__ == '__main__': +if __name__ == "__main__": runner = unittest.TextTestRunner() runner.run(suite) diff --git a/esda/tests/test_topo.py b/esda/tests/test_topo.py index b2bfe95a..f4c002ca 100644 --- a/esda/tests/test_topo.py +++ b/esda/tests/test_topo.py @@ -1,7 +1,9 @@ -import numpy, copy, pandas from unittest import TestCase -from ..topo import prominence, isolation, to_elevation, weights +import numpy +import pandas + +from ..topo import isolation, prominence, to_elevation, weights class TopoTester(TestCase): @@ -71,8 +73,9 @@ def test_isolation_valid(self): assert numpy.isnan(iso.loc[4, "parent_rank"]) assert (iso.dropna().parent_index == 4).all() assert ( - iso.sort_values("marks", ascending=False).index - == iso.sort_values("rank").index + iso.sort_values("marks", ascending=False).index == ( + iso.sort_values("rank").index + ) ).all() assert iso.loc[3, "isolation"] == 1.5 assert iso.loc[2, "gap"] == ( @@ -88,8 +91,9 @@ def test_isolation_valid(self): assert numpy.isnan(iso.loc[3, "parent_index"]) assert (iso.dropna().parent_index == [4, 2, 5, 5, 3]).all() assert ( - iso.sort_values("marks", ascending=False).index - == iso.sort_values("rank").index + iso.sort_values("marks", ascending=False).index == ( + iso.sort_values("rank").index + ) ).all() assert iso.loc[1, "isolation"] == 1 assert iso.loc[2, "gap"] == ( diff --git a/esda/tests/test_util.py b/esda/tests/test_util.py index 97e3410c..88d4035d 100644 --- a/esda/tests/test_util.py +++ b/esda/tests/test_util.py @@ -1,9 +1,10 @@ import unittest + import libpysal -from .. import util -from .. import moran import numpy as np +from .. import moran, util + class Fdr_Tester(unittest.TestCase): def setUp(self): diff --git a/esda/topo.py b/esda/topo.py index ff99b9d9..1c96b41d 100644 --- a/esda/topo.py +++ b/esda/topo.py @@ -1,11 +1,12 @@ -import numpy, pandas +import numpy +import pandas +from libpysal import weights from scipy.spatial import distance -from sklearn.utils import check_array from scipy.stats import mode as most_common_value -from libpysal import weights +from sklearn.utils import check_array try: - from tqdm import tqdm as _tqdm + from tqdm import tqdm HAS_TQDM = True except ImportError: @@ -18,27 +19,25 @@ def _passthrough(sequence): def _resolve_metric(X, coordinates, metric): """ - Provide a distance function that you can use to find the distance betwen arbitrary points. + Provide a distance function that you can use + to find the distance betwen arbitrary points. """ if callable(metric): distance_func = metric elif metric.lower() == "haversine": try: from numba import autojit - except: + except ImportError: def autojit(func): return func @autojit def harcdist(p1, p2): - """ Compute the kernel of haversine""" + """Compute the kernel of haversine""" x = numpy.sin(p2[1] - p1[1] / 2) ** 2 - y = ( - numpy.cos(p2[1]) - * numpy.cos(p1[1]) - * numpy.sin((p2[0] - p1[0]) / 2) ** 2 - ) + cosp1, cosp2 = numpy.cos(p1[1]), numpy.cos(p2[1]) + y = cosp2 * cosp1 * numpy.sin((p2[0] - p1[0]) / 2) ** 2 return 2 * numpy.arcsin(numpy.sqrt(x + y)) distance_func = harcdist @@ -54,7 +53,7 @@ def harcdist(p1, p2): ) def lookup_distance(a, b): - """ Find location of points a,b in X and return precomputed distances""" + """Find location of points a,b in X and return precomputed distances""" (aloc,) = (X == a).all(axis=1).nonzero() (bloc,) = (X == b).all(axis=1).nonzero() if (len(aloc) > 1) or (len(bloc) > 1): @@ -99,8 +98,9 @@ def isolation( (N, p) array of data to use as input. If p > 1, the "elevation" is computed using the topo.to_elevation function. coordinates : numpy.ndarray - (N,k) array of locations for X to compute distances. If metric='precomputed', this - should contain the distances from each point to every other point, and k == N. + (N,k) array of locations for X to compute distances. If + metric='precomputed', this should contain the distances from + each point to every other point, and k == N. metric : string or callable (default: 'euclidean') name of distance metric in scipy.spatial.distance, or function, that can be used to compute distances between locations. If 'precomputed', ad-hoc function @@ -134,10 +134,7 @@ def isolation( if progressbar and HAS_TQDM: pbar = tqdm elif progressbar and (not HAS_TQDM): - try: - import tqdm - except ImportError as e: - raise ImportError("the tqdm module is required for progressbars") + raise ImportError("The `tqdm` module is required for progressbars.") else: pbar = _passthrough @@ -202,8 +199,8 @@ def prominence( Returns ------- - the prominence of each observation in X, possibly along with the set of saddle points, - peaks, and/or dominating peak tree. + the prominence of each observation in X, possibly along with the + set of saddle points, peaks, and/or dominating peak tree. Notes ----- @@ -242,10 +239,7 @@ def prominence( if progressbar and HAS_TQDM: pbar = tqdm elif progressbar and (not HAS_TQDM): - try: - import tqdm - except ImportError as e: - raise ImportError("the tqdm module is required for progressbars") + raise ImportError("The `tqdm` module is required for progressbars.") else: pbar = _passthrough @@ -260,9 +254,10 @@ def prominence( msg = "assessing {} (rank: {}, value: {})".format(this_full_ix, rank, value) # use the dominating_peak vector. A new obs either has: - # 1. neighbors whose dominating_peak are all -1 (new peak) - # 2. neighbors whose dominating_peak are all -1 or an integer (slope of current peak) - # 3. neighbors whose dominating_peak include at least two integers and any -1 (key col) + # Neighbors whose dominating_peak... + # 1. ... are all -1 (new peak) + # 2. ... are all -1 or an integer (slope of current peak) + # 3. ... include at least two integers and any -1 (key col) _, neighbs = connectivity[this_full_ix].toarray().nonzero() this_preds = predecessors[neighbs] @@ -336,23 +331,24 @@ def prominence( if verbose: print( - "--------------------------------------------\nat the {}" - " iteration:\n{}\n\tpeaks\t{}\n\tprominence\t{}\n\tkey_cols\t{}\n" - .format(rank, msg, peaks, prominence, key_cols) + ( + "--------------------------------------------\n" + "at the {rank} iteration:\n{msg}\n\tpeaks\t{peaks}\n" + "\tprominence\t{prominence}\n\tkey_cols\t{key_cols}\n" + ) ) if gdf is not None: peakframe = gdf.iloc[peaks] keycolframe = gdf.iloc[list(key_cols.values())] - slopeframe = gdf[ - (~(gdf.index.isin(peakframe.index) | gdf.index.isin(keycolframe.index))) - & mask - ] + _isin_peak = gdf.index.isin(peakframe.index) + _isin_keycol = gdf.index.isin(keycolframe.index) + slopeframe = gdf[~(_isin_peak | _isin_keycol) & mask] rest = gdf[~mask] this_geom = gdf.iloc[[this_full_ix]] - ax = rest.plot(edgecolor="k", linewidth=0.1, facecolor="lightblue") - ax = slopeframe.plot(edgecolor="k", linewidth=0.1, facecolor="linen", ax=ax) - ax = keycolframe.plot(edgecolor="k", linewidth=0.1, facecolor="red", ax=ax) - ax = peakframe.plot(edgecolor="k", linewidth=0.1, facecolor="yellow", ax=ax) + ax = rest.plot(edgecolor="k", lw=0.1, fc="lightblue") + ax = slopeframe.plot(ec="k", lw=0.1, fc="linen", ax=ax) + ax = keycolframe.plot(ec="k", lw=0.1, fc="red", ax=ax) + ax = peakframe.plot(ec="k", lw=0.1, fc="yellow", ax=ax) ax = this_geom.centroid.plot(ax=ax, color="orange", marker="*") plt.show() command = input() @@ -391,14 +387,16 @@ def to_elevation(X, middle="mean", metric="euclidean"): X : numpy.ndarray Array of values for which to compute elevation. middle : callable or string - name of function in numpy (or function itself) used to compute the center point of X + name of function in numpy (or function itself) + used to compute the center point of X metric : string - metric to use in `scipy.spatial.distance.cdist` to compute the distance from the center - of mass to the point. + metric to use in `scipy.spatial.distance.cdist` to + compute the distance from the center of mass to the point. Returns -------- - (N,1)-shaped numpy array containing the "elevation" of each point relative to sea level (zero). + (N,1)-shaped numpy array containing the "elevation" of + each point relative to sea level (zero). """ if X.ndim == 1: @@ -429,7 +427,7 @@ def _check_connectivity(connectivity_or_coordinates): 3. a set of coordinates that we need to build the delaunay triangulation and then return the graph. """ - from scipy.sparse import issparse, csc_matrix + from scipy.sparse import issparse if issparse(connectivity_or_coordinates): shape = connectivity_or_coordinates.shape @@ -446,9 +444,10 @@ def _check_connectivity(connectivity_or_coordinates): if __name__ == "__main__": - import geopandas, pandas - from libpysal import weights, examples + import geopandas import matplotlib.pyplot as plt + import pandas # noqa F811 + from libpysal import examples, weights # noqa F811 from matplotlib import cm current_cmap = cm.get_cmap() diff --git a/setup.cfg b/setup.cfg index ac51808d..d93df069 100644 --- a/setup.cfg +++ b/setup.cfg @@ -5,3 +5,16 @@ versionfile_source = esda/_version.py versionfile_build = esda/_version.py tag_prefix = v parentdir_prefix = esda- + +[flake8] +max_line_length = 88 +exclude = docs/conf.py, versioneer.py, esda/_version.py +ignore = + F403, + F405 + +[black] +line-length = 88 + +[isort] +profile = black \ No newline at end of file diff --git a/setup.py b/setup.py index 1181f32a..71c20ac1 100644 --- a/setup.py +++ b/setup.py @@ -1,7 +1,9 @@ # coding: utf-8 -from setuptools import setup, find_packages -from distutils.command.build_py import build_py import os +from distutils.command.build_py import build_py + +from setuptools import find_packages, setup + import versioneer package = "esda" diff --git a/versioneer.py b/versioneer.py index 97130070..e283ecbf 100644 --- a/versioneer.py +++ b/versioneer.py @@ -1,4 +1,3 @@ - # Version: 0.20 """The Versioneer - like a rocketeer, but for versions. @@ -303,11 +302,13 @@ def get_root(): setup_py = os.path.join(root, "setup.py") versioneer_py = os.path.join(root, "versioneer.py") if not (os.path.exists(setup_py) or os.path.exists(versioneer_py)): - err = ("Versioneer was unable to run the project root directory. " - "Versioneer requires setup.py to be executed from " - "its immediate directory (like 'python setup.py COMMAND'), " - "or in a way that lets it use sys.argv[0] to find the root " - "(like 'python path/to/setup.py COMMAND').") + err = ( + "Versioneer was unable to run the project root directory. " + "Versioneer requires setup.py to be executed from " + "its immediate directory (like 'python setup.py COMMAND'), " + "or in a way that lets it use sys.argv[0] to find the root " + "(like 'python path/to/setup.py COMMAND')." + ) raise VersioneerBadRootError(err) try: # Certain runtime workflows (setup.py install/develop in a setuptools @@ -320,8 +321,10 @@ def get_root(): me_dir = os.path.normcase(os.path.splitext(my_path)[0]) vsr_dir = os.path.normcase(os.path.splitext(versioneer_py)[0]) if me_dir != vsr_dir: - print("Warning: build in %s is using versioneer.py from %s" - % (os.path.dirname(my_path), versioneer_py)) + print( + "Warning: build in %s is using versioneer.py from %s" + % (os.path.dirname(my_path), versioneer_py) + ) except NameError: pass return root @@ -367,16 +370,17 @@ class NotThisMethod(Exception): def register_vcs_handler(vcs, method): # decorator """Create decorator to mark a method as the handler of a VCS.""" + def decorate(f): """Store f in HANDLERS[vcs][method].""" HANDLERS.setdefault(vcs, {})[method] = f return f + return decorate # pylint:disable=too-many-arguments,consider-using-with # noqa -def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False, - env=None): +def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False, env=None): """Call the given command(s).""" assert isinstance(commands, list) process = None @@ -384,10 +388,13 @@ def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False, try: dispcmd = str([command] + args) # remember shell=False, so use git.cmd on windows, not just git - process = subprocess.Popen([command] + args, cwd=cwd, env=env, - stdout=subprocess.PIPE, - stderr=(subprocess.PIPE if hide_stderr - else None)) + process = subprocess.Popen( + [command] + args, + cwd=cwd, + env=env, + stdout=subprocess.PIPE, + stderr=(subprocess.PIPE if hide_stderr else None), + ) break except EnvironmentError: e = sys.exc_info()[1] @@ -410,7 +417,9 @@ def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False, return stdout, process.returncode -LONG_VERSION_PY['git'] = r''' +LONG_VERSION_PY[ + "git" +] = r''' # This file helps to compute a version number in source trees obtained from # git-archive tarball (such as those provided by githubs download-from-tag # feature). Distribution tarballs (built by setup.py sdist) and build @@ -1091,7 +1100,7 @@ def git_versions_from_keywords(keywords, tag_prefix, verbose): # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of # just "foo-1.0". If we see a "tag: " prefix, prefer those. TAG = "tag: " - tags = {r[len(TAG):] for r in refs if r.startswith(TAG)} + tags = {r[len(TAG) :] for r in refs if r.startswith(TAG)} if not tags: # Either we're using git < 1.8.3, or there really are no tags. We use # a heuristic: assume all version tags have a digit. The old git %d @@ -1100,7 +1109,7 @@ def git_versions_from_keywords(keywords, tag_prefix, verbose): # between branches and tags. By ignoring refnames without digits, we # filter out many common branch names like "release" and # "stabilization", as well as "HEAD" and "master". - tags = {r for r in refs if re.search(r'\d', r)} + tags = {r for r in refs if re.search(r"\d", r)} if verbose: print("discarding '%s', no digits" % ",".join(refs - tags)) if verbose: @@ -1108,24 +1117,31 @@ def git_versions_from_keywords(keywords, tag_prefix, verbose): for ref in sorted(tags): # sorting will prefer e.g. "2.0" over "2.0rc1" if ref.startswith(tag_prefix): - r = ref[len(tag_prefix):] + r = ref[len(tag_prefix) :] # Filter out refs that exactly match prefix or that don't start # with a number once the prefix is stripped (mostly a concern # when prefix is '') - if not re.match(r'\d', r): + if not re.match(r"\d", r): continue if verbose: print("picking %s" % r) - return {"version": r, - "full-revisionid": keywords["full"].strip(), - "dirty": False, "error": None, - "date": date} + return { + "version": r, + "full-revisionid": keywords["full"].strip(), + "dirty": False, + "error": None, + "date": date, + } # no suitable tags, so version is "0+unknown", but full hex is still there if verbose: print("no suitable tags, using unknown + full revision id") - return {"version": "0+unknown", - "full-revisionid": keywords["full"].strip(), - "dirty": False, "error": "no suitable tags", "date": None} + return { + "version": "0+unknown", + "full-revisionid": keywords["full"].strip(), + "dirty": False, + "error": "no suitable tags", + "date": None, + } @register_vcs_handler("git", "pieces_from_vcs") @@ -1140,8 +1156,7 @@ def git_pieces_from_vcs(tag_prefix, root, verbose, runner=run_command): if sys.platform == "win32": GITS = ["git.cmd", "git.exe"] - _, rc = runner(GITS, ["rev-parse", "--git-dir"], cwd=root, - hide_stderr=True) + _, rc = runner(GITS, ["rev-parse", "--git-dir"], cwd=root, hide_stderr=True) if rc != 0: if verbose: print("Directory %s not under git control" % root) @@ -1149,10 +1164,19 @@ def git_pieces_from_vcs(tag_prefix, root, verbose, runner=run_command): # if there is a tag matching tag_prefix, this yields TAG-NUM-gHEX[-dirty] # if there isn't one, this yields HEX[-dirty] (no NUM) - describe_out, rc = runner(GITS, ["describe", "--tags", "--dirty", - "--always", "--long", - "--match", "%s*" % tag_prefix], - cwd=root) + describe_out, rc = runner( + GITS, + [ + "describe", + "--tags", + "--dirty", + "--always", + "--long", + "--match", + "%s*" % tag_prefix, + ], + cwd=root, + ) # --long was added in git-1.5.5 if describe_out is None: raise NotThisMethod("'git describe' failed") @@ -1167,8 +1191,7 @@ def git_pieces_from_vcs(tag_prefix, root, verbose, runner=run_command): pieces["short"] = full_out[:7] # maybe improved later pieces["error"] = None - branch_name, rc = runner(GITS, ["rev-parse", "--abbrev-ref", "HEAD"], - cwd=root) + branch_name, rc = runner(GITS, ["rev-parse", "--abbrev-ref", "HEAD"], cwd=root) # --abbrev-ref was added in git-1.6.3 if rc != 0 or branch_name is None: raise NotThisMethod("'git rev-parse --abbrev-ref' returned error") @@ -1208,17 +1231,16 @@ def git_pieces_from_vcs(tag_prefix, root, verbose, runner=run_command): dirty = git_describe.endswith("-dirty") pieces["dirty"] = dirty if dirty: - git_describe = git_describe[:git_describe.rindex("-dirty")] + git_describe = git_describe[: git_describe.rindex("-dirty")] # now we have TAG-NUM-gHEX or HEX if "-" in git_describe: # TAG-NUM-gHEX - mo = re.search(r'^(.+)-(\d+)-g([0-9a-f]+)$', git_describe) + mo = re.search(r"^(.+)-(\d+)-g([0-9a-f]+)$", git_describe) if not mo: # unparseable. Maybe git-describe is misbehaving? - pieces["error"] = ("unable to parse git-describe output: '%s'" - % describe_out) + pieces["error"] = "unable to parse git-describe output: '%s'" % describe_out return pieces # tag @@ -1227,10 +1249,12 @@ def git_pieces_from_vcs(tag_prefix, root, verbose, runner=run_command): if verbose: fmt = "tag '%s' doesn't start with prefix '%s'" print(fmt % (full_tag, tag_prefix)) - pieces["error"] = ("tag '%s' doesn't start with prefix '%s'" - % (full_tag, tag_prefix)) + pieces["error"] = "tag '%s' doesn't start with prefix '%s'" % ( + full_tag, + tag_prefix, + ) return pieces - pieces["closest-tag"] = full_tag[len(tag_prefix):] + pieces["closest-tag"] = full_tag[len(tag_prefix) :] # distance: number of commits since tag pieces["distance"] = int(mo.group(2)) @@ -1303,15 +1327,21 @@ def versions_from_parentdir(parentdir_prefix, root, verbose): for _ in range(3): dirname = os.path.basename(root) if dirname.startswith(parentdir_prefix): - return {"version": dirname[len(parentdir_prefix):], - "full-revisionid": None, - "dirty": False, "error": None, "date": None} + return { + "version": dirname[len(parentdir_prefix) :], + "full-revisionid": None, + "dirty": False, + "error": None, + "date": None, + } rootdirs.append(root) root = os.path.dirname(root) # up a level if verbose: - print("Tried directories %s but none started with prefix %s" % - (str(rootdirs), parentdir_prefix)) + print( + "Tried directories %s but none started with prefix %s" + % (str(rootdirs), parentdir_prefix) + ) raise NotThisMethod("rootdir doesn't start with parentdir_prefix") @@ -1340,11 +1370,13 @@ def versions_from_file(filename): contents = f.read() except EnvironmentError: raise NotThisMethod("unable to read _version.py") - mo = re.search(r"version_json = '''\n(.*)''' # END VERSION_JSON", - contents, re.M | re.S) + mo = re.search( + r"version_json = '''\n(.*)''' # END VERSION_JSON", contents, re.M | re.S + ) if not mo: - mo = re.search(r"version_json = '''\r\n(.*)''' # END VERSION_JSON", - contents, re.M | re.S) + mo = re.search( + r"version_json = '''\r\n(.*)''' # END VERSION_JSON", contents, re.M | re.S + ) if not mo: raise NotThisMethod("no version_json in _version.py") return json.loads(mo.group(1)) @@ -1353,8 +1385,7 @@ def versions_from_file(filename): def write_to_version_file(filename, versions): """Write the given version number to the given _version.py file.""" os.unlink(filename) - contents = json.dumps(versions, sort_keys=True, - indent=1, separators=(",", ": ")) + contents = json.dumps(versions, sort_keys=True, indent=1, separators=(",", ": ")) with open(filename, "w") as f: f.write(SHORT_VERSION_PY % contents) @@ -1386,8 +1417,7 @@ def render_pep440(pieces): rendered += ".dirty" else: # exception #1 - rendered = "0+untagged.%d.g%s" % (pieces["distance"], - pieces["short"]) + rendered = "0+untagged.%d.g%s" % (pieces["distance"], pieces["short"]) if pieces["dirty"]: rendered += ".dirty" return rendered @@ -1416,8 +1446,7 @@ def render_pep440_branch(pieces): rendered = "0" if pieces["branch"] != "master": rendered += ".dev0" - rendered += "+untagged.%d.g%s" % (pieces["distance"], - pieces["short"]) + rendered += "+untagged.%d.g%s" % (pieces["distance"], pieces["short"]) if pieces["dirty"]: rendered += ".dirty" return rendered @@ -1560,11 +1589,13 @@ def render_git_describe_long(pieces): def render(pieces, style): """Render the given version pieces into the requested style.""" if pieces["error"]: - return {"version": "unknown", - "full-revisionid": pieces.get("long"), - "dirty": None, - "error": pieces["error"], - "date": None} + return { + "version": "unknown", + "full-revisionid": pieces.get("long"), + "dirty": None, + "error": pieces["error"], + "date": None, + } if not style or style == "default": style = "pep440" # the default @@ -1588,9 +1619,13 @@ def render(pieces, style): else: raise ValueError("unknown style '%s'" % style) - return {"version": rendered, "full-revisionid": pieces["long"], - "dirty": pieces["dirty"], "error": None, - "date": pieces.get("date")} + return { + "version": rendered, + "full-revisionid": pieces["long"], + "dirty": pieces["dirty"], + "error": None, + "date": pieces.get("date"), + } class VersioneerBadRootError(Exception): @@ -1613,8 +1648,9 @@ def get_versions(verbose=False): handlers = HANDLERS.get(cfg.VCS) assert handlers, "unrecognized VCS '%s'" % cfg.VCS verbose = verbose or cfg.verbose - assert cfg.versionfile_source is not None, \ - "please set versioneer.versionfile_source" + assert ( + cfg.versionfile_source is not None + ), "please set versioneer.versionfile_source" assert cfg.tag_prefix is not None, "please set versioneer.tag_prefix" versionfile_abs = os.path.join(root, cfg.versionfile_source) @@ -1668,9 +1704,13 @@ def get_versions(verbose=False): if verbose: print("unable to compute version") - return {"version": "0+unknown", "full-revisionid": None, - "dirty": None, "error": "unable to compute version", - "date": None} + return { + "version": "0+unknown", + "full-revisionid": None, + "dirty": None, + "error": "unable to compute version", + "date": None, + } def get_version(): @@ -1723,6 +1763,7 @@ def run(self): print(" date: %s" % vers.get("date")) if vers["error"]: print(" error: %s" % vers["error"]) + cmds["version"] = cmd_version # we override "build_py" in both distutils and setuptools @@ -1741,8 +1782,8 @@ def run(self): # setup.py egg_info -> ? # we override different "build_py" commands for both environments - if 'build_py' in cmds: - _build_py = cmds['build_py'] + if "build_py" in cmds: + _build_py = cmds["build_py"] elif "setuptools" in sys.modules: from setuptools.command.build_py import build_py as _build_py else: @@ -1757,14 +1798,14 @@ def run(self): # now locate _version.py in the new build/ directory and replace # it with an updated value if cfg.versionfile_build: - target_versionfile = os.path.join(self.build_lib, - cfg.versionfile_build) + target_versionfile = os.path.join(self.build_lib, cfg.versionfile_build) print("UPDATING %s" % target_versionfile) write_to_version_file(target_versionfile, versions) + cmds["build_py"] = cmd_build_py - if 'build_ext' in cmds: - _build_ext = cmds['build_ext'] + if "build_ext" in cmds: + _build_ext = cmds["build_ext"] elif "setuptools" in sys.modules: from setuptools.command.build_ext import build_ext as _build_ext else: @@ -1784,14 +1825,15 @@ def run(self): return # now locate _version.py in the new build/ directory and replace # it with an updated value - target_versionfile = os.path.join(self.build_lib, - cfg.versionfile_build) + target_versionfile = os.path.join(self.build_lib, cfg.versionfile_build) print("UPDATING %s" % target_versionfile) write_to_version_file(target_versionfile, versions) + cmds["build_ext"] = cmd_build_ext if "cx_Freeze" in sys.modules: # cx_freeze enabled? from cx_Freeze.dist import build_exe as _build_exe + # nczeczulin reports that py2exe won't like the pep440-style string # as FILEVERSION, but it can be used for PRODUCTVERSION, e.g. # setup(console=[{ @@ -1812,17 +1854,21 @@ def run(self): os.unlink(target_versionfile) with open(cfg.versionfile_source, "w") as f: LONG = LONG_VERSION_PY[cfg.VCS] - f.write(LONG % - {"DOLLAR": "$", - "STYLE": cfg.style, - "TAG_PREFIX": cfg.tag_prefix, - "PARENTDIR_PREFIX": cfg.parentdir_prefix, - "VERSIONFILE_SOURCE": cfg.versionfile_source, - }) + f.write( + LONG + % { + "DOLLAR": "$", + "STYLE": cfg.style, + "TAG_PREFIX": cfg.tag_prefix, + "PARENTDIR_PREFIX": cfg.parentdir_prefix, + "VERSIONFILE_SOURCE": cfg.versionfile_source, + } + ) + cmds["build_exe"] = cmd_build_exe del cmds["build_py"] - if 'py2exe' in sys.modules: # py2exe enabled? + if "py2exe" in sys.modules: # py2exe enabled? from py2exe.distutils_buildexe import py2exe as _py2exe class cmd_py2exe(_py2exe): @@ -1838,18 +1884,22 @@ def run(self): os.unlink(target_versionfile) with open(cfg.versionfile_source, "w") as f: LONG = LONG_VERSION_PY[cfg.VCS] - f.write(LONG % - {"DOLLAR": "$", - "STYLE": cfg.style, - "TAG_PREFIX": cfg.tag_prefix, - "PARENTDIR_PREFIX": cfg.parentdir_prefix, - "VERSIONFILE_SOURCE": cfg.versionfile_source, - }) + f.write( + LONG + % { + "DOLLAR": "$", + "STYLE": cfg.style, + "TAG_PREFIX": cfg.tag_prefix, + "PARENTDIR_PREFIX": cfg.parentdir_prefix, + "VERSIONFILE_SOURCE": cfg.versionfile_source, + } + ) + cmds["py2exe"] = cmd_py2exe # we override different "sdist" commands for both environments - if 'sdist' in cmds: - _sdist = cmds['sdist'] + if "sdist" in cmds: + _sdist = cmds["sdist"] elif "setuptools" in sys.modules: from setuptools.command.sdist import sdist as _sdist else: @@ -1874,8 +1924,10 @@ def make_release_tree(self, base_dir, files): # updated value target_versionfile = os.path.join(base_dir, cfg.versionfile_source) print("UPDATING %s" % target_versionfile) - write_to_version_file(target_versionfile, - self._versioneer_generated_versions) + write_to_version_file( + target_versionfile, self._versioneer_generated_versions + ) + cmds["sdist"] = cmd_sdist return cmds @@ -1935,11 +1987,13 @@ def do_setup(): root = get_root() try: cfg = get_config_from_root(root) - except (EnvironmentError, configparser.NoSectionError, - configparser.NoOptionError) as e: + except ( + EnvironmentError, + configparser.NoSectionError, + configparser.NoOptionError, + ) as e: if isinstance(e, (EnvironmentError, configparser.NoSectionError)): - print("Adding sample versioneer config to setup.cfg", - file=sys.stderr) + print("Adding sample versioneer config to setup.cfg", file=sys.stderr) with open(os.path.join(root, "setup.cfg"), "a") as f: f.write(SAMPLE_CONFIG) print(CONFIG_ERROR, file=sys.stderr) @@ -1948,15 +2002,18 @@ def do_setup(): print(" creating %s" % cfg.versionfile_source) with open(cfg.versionfile_source, "w") as f: LONG = LONG_VERSION_PY[cfg.VCS] - f.write(LONG % {"DOLLAR": "$", - "STYLE": cfg.style, - "TAG_PREFIX": cfg.tag_prefix, - "PARENTDIR_PREFIX": cfg.parentdir_prefix, - "VERSIONFILE_SOURCE": cfg.versionfile_source, - }) - - ipy = os.path.join(os.path.dirname(cfg.versionfile_source), - "__init__.py") + f.write( + LONG + % { + "DOLLAR": "$", + "STYLE": cfg.style, + "TAG_PREFIX": cfg.tag_prefix, + "PARENTDIR_PREFIX": cfg.parentdir_prefix, + "VERSIONFILE_SOURCE": cfg.versionfile_source, + } + ) + + ipy = os.path.join(os.path.dirname(cfg.versionfile_source), "__init__.py") if os.path.exists(ipy): try: with open(ipy, "r") as f: @@ -2004,8 +2061,10 @@ def do_setup(): else: print(" 'versioneer.py' already in MANIFEST.in") if cfg.versionfile_source not in simple_includes: - print(" appending versionfile_source ('%s') to MANIFEST.in" % - cfg.versionfile_source) + print( + " appending versionfile_source ('%s') to MANIFEST.in" + % cfg.versionfile_source + ) with open(manifest_in, "a") as f: f.write("include %s\n" % cfg.versionfile_source) else: