From 4124b15dc538878ddaf4c35db5b804bda24b5e8c Mon Sep 17 00:00:00 2001 From: James Gaboardi Date: Sun, 20 Nov 2022 12:00:01 -0500 Subject: [PATCH 1/2] black/isort/flake8 repo --- docs/conf.py | 184 ++++---- esda/__init__.py | 45 +- esda/_version.py | 154 ++++--- esda/adbscan.py | 110 ++--- esda/crand.py | 66 ++- esda/gamma.py | 14 +- esda/geary.py | 51 ++- esda/geary_local.py | 27 +- esda/geary_local_mv.py | 8 +- esda/getisord.py | 206 +++++---- esda/join_counts.py | 61 ++- esda/join_counts_local.py | 55 +-- esda/join_counts_local_bv.py | 81 ++-- esda/join_counts_local_mv.py | 66 ++- esda/lee.py | 54 +-- esda/losh.py | 39 +- esda/map_comparison.py | 6 +- esda/mixture_smoothing.py | 137 ++++-- esda/moran.py | 478 +++++++++---------- esda/shape.py | 48 +- esda/silhouettes.py | 74 +-- esda/smaup.py | 3 +- esda/smoothing.py | 657 ++++++++++++++++----------- esda/tabular.py | 10 +- esda/tests/test_adbscan.py | 23 +- esda/tests/test_gamma.py | 28 +- esda/tests/test_geary.py | 5 +- esda/tests/test_getisord.py | 5 +- esda/tests/test_join_counts.py | 19 +- esda/tests/test_ljc.py | 21 +- esda/tests/test_ljc_bv.py | 33 +- esda/tests/test_ljc_mv.py | 27 +- esda/tests/test_local_geary.py | 16 +- esda/tests/test_local_geary_mv.py | 17 +- esda/tests/test_losh.py | 16 +- esda/tests/test_map_comparison.py | 15 +- esda/tests/test_mixture_smoothing.py | 33 +- esda/tests/test_moran.py | 69 ++- esda/tests/test_shape.py | 45 +- esda/tests/test_silhouette.py | 243 ++++++---- esda/tests/test_smaup.py | 41 +- esda/tests/test_smoothing.py | 533 +++++++++++++--------- esda/tests/test_topo.py | 16 +- esda/tests/test_util.py | 5 +- esda/topo.py | 91 ++-- setup.cfg | 13 + setup.py | 6 +- versioneer.py | 281 +++++++----- 48 files changed, 2375 insertions(+), 1860 deletions(-) diff --git a/docs/conf.py b/docs/conf.py index 79ce90f1..dd5329cc 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -16,15 +16,15 @@ # add these directories to sys.path here. If the directory is relative to the # documentation root, use os.path.abspath to make it absolute, like shown here. # -import sys, os -import sphinx_bootstrap_theme - +import os +import sys -sys.path.insert(0, os.path.abspath('../')) +import sphinx_bootstrap_theme -#import your package to obtain the version info to display on the docs website -import esda +sys.path.insert(0, os.path.abspath("../")) +# import your package to obtain the version info to display on the docs website +import esda # noqa E402 # -- General configuration ------------------------------------------------ @@ -34,43 +34,44 @@ # Add any Sphinx extension module names here, as strings. They can be # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom # ones. -extensions = [#'sphinx_gallery.gen_gallery', - 'sphinx.ext.autodoc', - 'sphinx.ext.autosummary', - 'sphinx.ext.viewcode', - 'sphinxcontrib.bibtex', - 'sphinx.ext.mathjax', - 'sphinx.ext.doctest', - 'sphinx.ext.intersphinx', - 'numpydoc', - 'matplotlib.sphinxext.plot_directive', - 'nbsphinx'] +extensions = [ # 'sphinx_gallery.gen_gallery', + "sphinx.ext.autodoc", + "sphinx.ext.autosummary", + "sphinx.ext.viewcode", + "sphinxcontrib.bibtex", + "sphinx.ext.mathjax", + "sphinx.ext.doctest", + "sphinx.ext.intersphinx", + "numpydoc", + "matplotlib.sphinxext.plot_directive", + "nbsphinx", +] # Add any paths that contain templates here, relative to this directory. -templates_path = ['_templates'] +templates_path = ["_templates"] # The suffix(es) of source filenames. # You can specify multiple suffix as a list of string: # # source_suffix = ['.rst', '.md'] -source_suffix = '.rst' +source_suffix = ".rst" # The master toctree document. -master_doc = 'index' +master_doc = "index" # General information about the project. project = "esda" # string of your project name, for example, 'giddy' -copyright = '2018, pysal developers' -author = 'pysal developers' +copyright = "2018, pysal developers" +author = "pysal developers" # The version info for the project you're documenting, acts as replacement for # |version| and |release|, also used in various other places throughout the # built documents. # # The full version. -version = esda.__version__ #should replace it with your PACKAGE_NAME -release = esda.__version__ #should replace it with your PACKAGE_NAME +version = esda.__version__ # should replace it with your PACKAGE_NAME +release = esda.__version__ # should replace it with your PACKAGE_NAME # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. @@ -82,10 +83,10 @@ # List of patterns, relative to source directory, that match files and # directories to ignore when looking for source files. # This patterns also effect to html_static_path and html_extra_path -exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store', 'tests/*'] +exclude_patterns = ["_build", "Thumbs.db", ".DS_Store", "tests/*"] # The name of the Pygments (syntax highlighting) style to use. -pygments_style = 'sphinx' +pygments_style = "sphinx" # If true, `todo` and `todoList` produce output, else they produce nothing. todo_include_todos = False @@ -96,13 +97,14 @@ # a list of builtin themes. # # html_theme = 'alabaster' -html_theme = 'bootstrap' +html_theme = "bootstrap" html_theme_path = sphinx_bootstrap_theme.get_html_theme_path() html_title = "%s v%s Manual" % (project, version) -# (Optional) Logo of your package. Should be small enough to fit the navbar (ideally 24x24). +# (Optional) Logo of your package. +# Should be small enough to fit the navbar (ideally 24x24). # Path should be relative to the ``_static`` files directory. -#html_logo = "_static/images/package_logo.jpg" +# html_logo = "_static/images/package_logo.jpg" # (Optional) PySAL favicon html_favicon = "_static/images/pysal_favicon.ico" @@ -113,27 +115,20 @@ # documentation. # html_theme_options = { - # Navigation bar title. (Default: ``project`` value) - 'navbar_title': "esda", # string of your project name, for example, 'giddy' - + "navbar_title": "esda", # string of your project name, for example, 'giddy' # Render the next and previous page links in navbar. (Default: true) - 'navbar_sidebarrel': False, - + "navbar_sidebarrel": False, # Render the current pages TOC in the navbar. (Default: true) - #'navbar_pagenav': True, - #'navbar_pagenav': False, - + # 'navbar_pagenav': True, + # 'navbar_pagenav': False, # No sidebar - 'nosidebar': True, - + "nosidebar": True, # Tab name for the current pages TOC. (Default: "Page") - #'navbar_pagenav_name': "Page", - + # 'navbar_pagenav_name': "Page", # Global TOC depth for "site" navbar tab. (Default: 1) # Switching to -1 shows all levels. - 'globaltoc_depth': 2, - + "globaltoc_depth": 2, # Include hidden TOCs in Site navbar? # # Note: If this is "false", you cannot have mixed ``:hidden:`` and @@ -141,54 +136,46 @@ # will break. # # Values: "true" (default) or "false" - 'globaltoc_includehidden': "true", - + "globaltoc_includehidden": "true", # HTML navbar class (Default: "navbar") to attach to
element. # For black navbar, do "navbar navbar-inverse" - #'navbar_class': "navbar navbar-inverse", - + # 'navbar_class': "navbar navbar-inverse", # Fix navigation bar to top of page? # Values: "true" (default) or "false" - 'navbar_fixed_top': "true", - - + "navbar_fixed_top": "true", # Location of link to source. # Options are "nav" (default), "footer" or anything else to exclude. - 'source_link_position': 'footer', - + "source_link_position": "footer", # Bootswatch (http://bootswatch.com/) theme. # # Options are nothing (default) or the name of a valid theme # such as "amelia" or "cosmo", "yeti", "flatly". - 'bootswatch_theme': "yeti", - + "bootswatch_theme": "yeti", # Choose Bootstrap version. # Values: "3" (default) or "2" (in quotes) - 'bootstrap_version': "3", - + "bootstrap_version": "3", # Navigation bar menu - 'navbar_links': [ - ("Installation", "installation"), - ("Tutorial", "tutorial"), - ("API", "api"), - ("References", "references"), - ], - + "navbar_links": [ + ("Installation", "installation"), + ("Tutorial", "tutorial"), + ("API", "api"), + ("References", "references"), + ], } # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, # so a file named "default.css" will overwrite the builtin "default.css". -html_static_path = ['_static'] +html_static_path = ["_static"] # Custom sidebar templates, maps document names to template names. -#html_sidebars = {} +# html_sidebars = {} # html_sidebars = {'sidebar': ['localtoc.html', 'sourcelink.html', 'searchbox.html']} # -- Options for HTMLHelp output ------------------------------------------ # Output file base name for HTML help builder. -htmlhelp_basename = 'esda'+'doc' +htmlhelp_basename = "esda" + "doc" # -- Options for LaTeX output --------------------------------------------- @@ -197,15 +184,12 @@ # The paper size ('letterpaper' or 'a4paper'). # # 'papersize': 'letterpaper', - # The font size ('10pt', '11pt' or '12pt'). # # 'pointsize': '10pt', - # Additional stuff for the LaTeX preamble. # # 'preamble': '', - # Latex figure (float) alignment # # 'figure_align': 'htbp', @@ -215,8 +199,7 @@ # (source start file, target name, title, # author, documentclass [howto, manual, or own class]). latex_documents = [ - (master_doc, 'esda.tex', u'esda Documentation', - u'pysal developers', 'manual'), + (master_doc, "esda.tex", "esda Documentation", "pysal developers", "manual"), ] @@ -224,10 +207,7 @@ # One entry per manual page. List of tuples # (source start file, name, description, authors, manual section). -man_pages = [ - (master_doc, 'esda', u'esda Documentation', - [author], 1) -] +man_pages = [(master_doc, "esda", "esda Documentation", [author], 1)] # -- Options for Texinfo output ------------------------------------------- @@ -236,9 +216,15 @@ # (source start file, target name, title, author, # dir menu entry, description, category) texinfo_documents = [ - (master_doc, 'esda', u'esda Documentation', - author, 'esda developers', 'One line description of project.', - 'Miscellaneous'), + ( + master_doc, + "esda", + "esda Documentation", + author, + "esda developers", + "One line description of project.", + "Miscellaneous", + ), ] @@ -264,22 +250,34 @@ def setup(app): # Example configuration for intersphinx: refer to the Python standard library. intersphinx_mapping = { - 'python': ('https://docs.python.org/3/', None), - 'numpy': ('https://docs.scipy.org/doc/numpy', None), - 'pandas': ('https://pandas.pydata.org/pandas-docs/stable/', None), - 'geopandas': ('https://geopandas.readthedocs.io/en/latest/', None), - 'sklearn': ('https://scikit-learn.org/stable/', None), - 'giddy': ('https://giddy.readthedocs.io/en/latest/', None), - 'libpysal': ('https://pysal.org/libpysal/', None), - 'esda': ('https://esda.readthedocs.io/en/latest/', None), - 'region': ('https://region.readthedocs.io/en/latest/', None), - 'hdbscan': ('https://hdbscan.readthedocs.io/en/latest/', None) - + "python": ("https://docs.python.org/3/", None), + "numpy": ("https://docs.scipy.org/doc/numpy", None), + "pandas": ("https://pandas.pydata.org/pandas-docs/stable/", None), + "geopandas": ("https://geopandas.readthedocs.io/en/latest/", None), + "sklearn": ("https://scikit-learn.org/stable/", None), + "giddy": ("https://giddy.readthedocs.io/en/latest/", None), + "libpysal": ("https://pysal.org/libpysal/", None), + "esda": ("https://esda.readthedocs.io/en/latest/", None), + "region": ("https://region.readthedocs.io/en/latest/", None), + "hdbscan": ("https://hdbscan.readthedocs.io/en/latest/", None), } -numpydoc_xref_ignore = {'type', 'optional', 'default', 'shape', 'fitted', 'instance', - 'cluster', 'of', 'or', 'if', 'using', 'otherwise', 'required', - 'from'} +numpydoc_xref_ignore = { + "type", + "optional", + "default", + "shape", + "fitted", + "instance", + "cluster", + "of", + "or", + "if", + "using", + "otherwise", + "required", + "from", +} # This is processed by Jinja2 and inserted before each notebook @@ -306,7 +304,7 @@ def setup(app): \nbsphinxstartnotebook{\scriptsize\noindent\strut \textcolor{gray}{The following section was generated from \sphinxcode{\sphinxupquote{\strut {{ docname | escape_latex }}}} \dotfill}} -""" +""" # noqa E501 # This is processed by Jinja2 and inserted after each notebook nbsphinx_epilog = r""" diff --git a/esda/__init__.py b/esda/__init__.py index f12d3e4f..802c0fc8 100644 --- a/esda/__init__.py +++ b/esda/__init__.py @@ -3,40 +3,37 @@ ================================================= """ -from . import adbscan -from .gamma import Gamma -from .geary import Geary -from .geary_local import Geary_Local -from .geary_local_mv import Geary_Local_MV -from .getisord import G, G_Local -from .join_counts import Join_Counts -from .join_counts_local import Join_Counts_Local -from .join_counts_local_bv import Join_Counts_Local_BV -from .join_counts_local_mv import Join_Counts_Local_MV -from .lee import Spatial_Pearson, Spatial_Pearson_Local -from .losh import LOSH -from .map_comparison import ( - external_entropy, +from . import _version, adbscan, shape # noqa F401 +from .gamma import Gamma # noqa F401 +from .geary import Geary # noqa F401 +from .geary_local import Geary_Local # noqa F401 +from .geary_local_mv import Geary_Local_MV # noqa F401 +from .getisord import G, G_Local # noqa F401 +from .join_counts import Join_Counts # noqa F401 +from .join_counts_local import Join_Counts_Local # noqa F401 +from .join_counts_local_bv import Join_Counts_Local_BV # noqa F401 +from .join_counts_local_mv import Join_Counts_Local_MV # noqa F401 +from .lee import Spatial_Pearson, Spatial_Pearson_Local # noqa F401 +from .losh import LOSH # noqa F401 +from .map_comparison import ( # noqa F401 + areal_entropy, completeness, + external_entropy, homogeneity, overlay_entropy, - areal_entropy, ) -from .moran import ( +from .moran import ( # noqa F401 Moran, Moran_BV, Moran_BV_matrix, Moran_Local, Moran_Local_BV, - Moran_Rate, Moran_Local_Rate, + Moran_Rate, ) -from .silhouettes import path_silhouette, boundary_silhouette -from . import shape -from .smaup import Smaup -from .topo import prominence, isolation -from .util import fdr - -from . import _version +from .silhouettes import boundary_silhouette, path_silhouette # noqa F401 +from .smaup import Smaup # noqa F401 +from .topo import isolation, prominence # noqa F401 +from .util import fdr # noqa F401 __version__ = _version.get_versions()["version"] diff --git a/esda/_version.py b/esda/_version.py index d94bdb6d..bc5c4cfe 100644 --- a/esda/_version.py +++ b/esda/_version.py @@ -1,4 +1,3 @@ - # This file helps to compute a version number in source trees obtained from # git-archive tarball (such as those provided by githubs download-from-tag # feature). Distribution tarballs (built by setup.py sdist) and build @@ -58,18 +57,19 @@ class NotThisMethod(Exception): def register_vcs_handler(vcs, method): # decorator """Create decorator to mark a method as the handler of a VCS.""" + def decorate(f): """Store f in HANDLERS[vcs][method].""" if vcs not in HANDLERS: HANDLERS[vcs] = {} HANDLERS[vcs][method] = f return f + return decorate # pylint:disable=too-many-arguments,consider-using-with # noqa -def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False, - env=None): +def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False, env=None): """Call the given command(s).""" assert isinstance(commands, list) process = None @@ -77,10 +77,13 @@ def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False, try: dispcmd = str([command] + args) # remember shell=False, so use git.cmd on windows, not just git - process = subprocess.Popen([command] + args, cwd=cwd, env=env, - stdout=subprocess.PIPE, - stderr=(subprocess.PIPE if hide_stderr - else None)) + process = subprocess.Popen( + [command] + args, + cwd=cwd, + env=env, + stdout=subprocess.PIPE, + stderr=(subprocess.PIPE if hide_stderr else None), + ) break except EnvironmentError: e = sys.exc_info()[1] @@ -115,15 +118,21 @@ def versions_from_parentdir(parentdir_prefix, root, verbose): for _ in range(3): dirname = os.path.basename(root) if dirname.startswith(parentdir_prefix): - return {"version": dirname[len(parentdir_prefix):], - "full-revisionid": None, - "dirty": False, "error": None, "date": None} + return { + "version": dirname[len(parentdir_prefix) :], + "full-revisionid": None, + "dirty": False, + "error": None, + "date": None, + } rootdirs.append(root) root = os.path.dirname(root) # up a level if verbose: - print("Tried directories %s but none started with prefix %s" % - (str(rootdirs), parentdir_prefix)) + print( + "Tried directories %s but none started with prefix %s" + % (str(rootdirs), parentdir_prefix) + ) raise NotThisMethod("rootdir doesn't start with parentdir_prefix") @@ -182,7 +191,7 @@ def git_versions_from_keywords(keywords, tag_prefix, verbose): # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of # just "foo-1.0". If we see a "tag: " prefix, prefer those. TAG = "tag: " - tags = {r[len(TAG):] for r in refs if r.startswith(TAG)} + tags = {r[len(TAG) :] for r in refs if r.startswith(TAG)} if not tags: # Either we're using git < 1.8.3, or there really are no tags. We use # a heuristic: assume all version tags have a digit. The old git %d @@ -191,7 +200,7 @@ def git_versions_from_keywords(keywords, tag_prefix, verbose): # between branches and tags. By ignoring refnames without digits, we # filter out many common branch names like "release" and # "stabilization", as well as "HEAD" and "master". - tags = {r for r in refs if re.search(r'\d', r)} + tags = {r for r in refs if re.search(r"\d", r)} if verbose: print("discarding '%s', no digits" % ",".join(refs - tags)) if verbose: @@ -199,24 +208,31 @@ def git_versions_from_keywords(keywords, tag_prefix, verbose): for ref in sorted(tags): # sorting will prefer e.g. "2.0" over "2.0rc1" if ref.startswith(tag_prefix): - r = ref[len(tag_prefix):] + r = ref[len(tag_prefix) :] # Filter out refs that exactly match prefix or that don't start # with a number once the prefix is stripped (mostly a concern # when prefix is '') - if not re.match(r'\d', r): + if not re.match(r"\d", r): continue if verbose: print("picking %s" % r) - return {"version": r, - "full-revisionid": keywords["full"].strip(), - "dirty": False, "error": None, - "date": date} + return { + "version": r, + "full-revisionid": keywords["full"].strip(), + "dirty": False, + "error": None, + "date": date, + } # no suitable tags, so version is "0+unknown", but full hex is still there if verbose: print("no suitable tags, using unknown + full revision id") - return {"version": "0+unknown", - "full-revisionid": keywords["full"].strip(), - "dirty": False, "error": "no suitable tags", "date": None} + return { + "version": "0+unknown", + "full-revisionid": keywords["full"].strip(), + "dirty": False, + "error": "no suitable tags", + "date": None, + } @register_vcs_handler("git", "pieces_from_vcs") @@ -231,8 +247,7 @@ def git_pieces_from_vcs(tag_prefix, root, verbose, runner=run_command): if sys.platform == "win32": GITS = ["git.cmd", "git.exe"] - _, rc = runner(GITS, ["rev-parse", "--git-dir"], cwd=root, - hide_stderr=True) + _, rc = runner(GITS, ["rev-parse", "--git-dir"], cwd=root, hide_stderr=True) if rc != 0: if verbose: print("Directory %s not under git control" % root) @@ -240,10 +255,19 @@ def git_pieces_from_vcs(tag_prefix, root, verbose, runner=run_command): # if there is a tag matching tag_prefix, this yields TAG-NUM-gHEX[-dirty] # if there isn't one, this yields HEX[-dirty] (no NUM) - describe_out, rc = runner(GITS, ["describe", "--tags", "--dirty", - "--always", "--long", - "--match", "%s*" % tag_prefix], - cwd=root) + describe_out, rc = runner( + GITS, + [ + "describe", + "--tags", + "--dirty", + "--always", + "--long", + "--match", + "%s*" % tag_prefix, + ], + cwd=root, + ) # --long was added in git-1.5.5 if describe_out is None: raise NotThisMethod("'git describe' failed") @@ -258,8 +282,7 @@ def git_pieces_from_vcs(tag_prefix, root, verbose, runner=run_command): pieces["short"] = full_out[:7] # maybe improved later pieces["error"] = None - branch_name, rc = runner(GITS, ["rev-parse", "--abbrev-ref", "HEAD"], - cwd=root) + branch_name, rc = runner(GITS, ["rev-parse", "--abbrev-ref", "HEAD"], cwd=root) # --abbrev-ref was added in git-1.6.3 if rc != 0 or branch_name is None: raise NotThisMethod("'git rev-parse --abbrev-ref' returned error") @@ -299,17 +322,16 @@ def git_pieces_from_vcs(tag_prefix, root, verbose, runner=run_command): dirty = git_describe.endswith("-dirty") pieces["dirty"] = dirty if dirty: - git_describe = git_describe[:git_describe.rindex("-dirty")] + git_describe = git_describe[: git_describe.rindex("-dirty")] # now we have TAG-NUM-gHEX or HEX if "-" in git_describe: # TAG-NUM-gHEX - mo = re.search(r'^(.+)-(\d+)-g([0-9a-f]+)$', git_describe) + mo = re.search(r"^(.+)-(\d+)-g([0-9a-f]+)$", git_describe) if not mo: # unparseable. Maybe git-describe is misbehaving? - pieces["error"] = ("unable to parse git-describe output: '%s'" - % describe_out) + pieces["error"] = "unable to parse git-describe output: '%s'" % describe_out return pieces # tag @@ -318,10 +340,12 @@ def git_pieces_from_vcs(tag_prefix, root, verbose, runner=run_command): if verbose: fmt = "tag '%s' doesn't start with prefix '%s'" print(fmt % (full_tag, tag_prefix)) - pieces["error"] = ("tag '%s' doesn't start with prefix '%s'" - % (full_tag, tag_prefix)) + pieces["error"] = "tag '%s' doesn't start with prefix '%s'" % ( + full_tag, + tag_prefix, + ) return pieces - pieces["closest-tag"] = full_tag[len(tag_prefix):] + pieces["closest-tag"] = full_tag[len(tag_prefix) :] # distance: number of commits since tag pieces["distance"] = int(mo.group(2)) @@ -370,8 +394,7 @@ def render_pep440(pieces): rendered += ".dirty" else: # exception #1 - rendered = "0+untagged.%d.g%s" % (pieces["distance"], - pieces["short"]) + rendered = "0+untagged.%d.g%s" % (pieces["distance"], pieces["short"]) if pieces["dirty"]: rendered += ".dirty" return rendered @@ -400,8 +423,7 @@ def render_pep440_branch(pieces): rendered = "0" if pieces["branch"] != "master": rendered += ".dev0" - rendered += "+untagged.%d.g%s" % (pieces["distance"], - pieces["short"]) + rendered += "+untagged.%d.g%s" % (pieces["distance"], pieces["short"]) if pieces["dirty"]: rendered += ".dirty" return rendered @@ -544,11 +566,13 @@ def render_git_describe_long(pieces): def render(pieces, style): """Render the given version pieces into the requested style.""" if pieces["error"]: - return {"version": "unknown", - "full-revisionid": pieces.get("long"), - "dirty": None, - "error": pieces["error"], - "date": None} + return { + "version": "unknown", + "full-revisionid": pieces.get("long"), + "dirty": None, + "error": pieces["error"], + "date": None, + } if not style or style == "default": style = "pep440" # the default @@ -572,9 +596,13 @@ def render(pieces, style): else: raise ValueError("unknown style '%s'" % style) - return {"version": rendered, "full-revisionid": pieces["long"], - "dirty": pieces["dirty"], "error": None, - "date": pieces.get("date")} + return { + "version": rendered, + "full-revisionid": pieces["long"], + "dirty": pieces["dirty"], + "error": None, + "date": pieces.get("date"), + } def get_versions(): @@ -588,8 +616,7 @@ def get_versions(): verbose = cfg.verbose try: - return git_versions_from_keywords(get_keywords(), cfg.tag_prefix, - verbose) + return git_versions_from_keywords(get_keywords(), cfg.tag_prefix, verbose) except NotThisMethod: pass @@ -598,13 +625,16 @@ def get_versions(): # versionfile_source is the relative path from the top of the source # tree (where the .git directory might live) to this file. Invert # this to find the root from __file__. - for _ in cfg.versionfile_source.split('/'): + for _ in cfg.versionfile_source.split("/"): root = os.path.dirname(root) except NameError: - return {"version": "0+unknown", "full-revisionid": None, - "dirty": None, - "error": "unable to find root of source tree", - "date": None} + return { + "version": "0+unknown", + "full-revisionid": None, + "dirty": None, + "error": "unable to find root of source tree", + "date": None, + } try: pieces = git_pieces_from_vcs(cfg.tag_prefix, root, verbose) @@ -618,6 +648,10 @@ def get_versions(): except NotThisMethod: pass - return {"version": "0+unknown", "full-revisionid": None, - "dirty": None, - "error": "unable to compute version", "date": None} + return { + "version": "0+unknown", + "full-revisionid": None, + "dirty": None, + "error": "unable to compute version", + "date": None, + } diff --git a/esda/adbscan.py b/esda/adbscan.py index a1a0a39f..dbb69e26 100644 --- a/esda/adbscan.py +++ b/esda/adbscan.py @@ -5,14 +5,16 @@ __author__ = "Dani Arribas-Bel " import warnings -import pandas +from collections import Counter + import numpy as np +import pandas from libpysal.cg.alpha_shapes import alpha_shape_auto from scipy.spatial import cKDTree -from collections import Counter +from sklearn.base import BaseEstimator as _BaseEstimator +from sklearn.base import ClusterMixin as _ClusterMixin from sklearn.cluster import DBSCAN from sklearn.neighbors import KNeighborsClassifier -from sklearn.base import BaseEstimator as _BaseEstimator, ClusterMixin as _ClusterMixin __all__ = ["ADBSCAN", "remap_lbls", "ensemble", "get_cluster_boundary"] @@ -32,52 +34,50 @@ class ADBSCAN(_ClusterMixin, _BaseEstimator): Parameters ---------- - eps : float - The maximum distance between two samples for them to be considered - as in the same neighborhood. - min_samples : int - The number of samples (or total weight) in a neighborhood - for a point to be considered as a core point. This includes the - point itself. - algorithm : {'auto', 'ball_tree', 'kd_tree', 'brute'}, optional - The algorithm to be used by the NearestNeighbors module - to compute pointwise distances and find nearest neighbors. - See NearestNeighbors module documentation for details. - n_jobs : int - [Optional. Default=1] The number of parallel jobs to run. If - -1, then the number of jobs is set to the number of CPU - cores. - pct_exact : float - [Optional. Default=0.1] Proportion of the entire dataset - used to calculate DBSCAN in each draw - reps : int - [Optional. Default=100] Number of random samples to draw in order to - build final solution - keep_solus : Boolean - [Optional. Default=False] If True, the `solus` and - `solus_relabelled` objects are kept, else it is deleted to - save memory - pct_thr : float - [Optional. Default=0.9] Minimum proportion of replications that a non-noise - label need to be assigned to an observation for that observation to be labelled - as such + eps : float + The maximum distance between two samples for them to be considered + as in the same neighborhood. + min_samples : int + The number of samples (or total weight) in a neighborhood + for a point to be considered as a core point. This includes the + point itself. + algorithm : {'auto', 'ball_tree', 'kd_tree', 'brute'}, optional + The algorithm to be used by the NearestNeighbors module + to compute pointwise distances and find nearest neighbors. + See NearestNeighbors module documentation for details. + n_jobs : int + [Optional. Default=1] The number of parallel jobs to run. If -1, then + the number of jobs is set to the number of CPU cores. + pct_exact : float + [Optional. Default=0.1] Proportion of the entire dataset + used to calculate DBSCAN in each draw + reps : int + [Optional. Default=100] Number of random samples to draw in order to + build final solution + keep_solus : bool + [Optional. Default=False] If True, the `solus` and `solus_relabelled` + objects are kept, else it is deleted to save memory + pct_thr : float + [Optional. Default=0.9] Minimum proportion of replications that + a non-noise label need to be assigned to an observation for that + observation to be labelled as such Attributes ---------- - labels_ : array - [Only available after `fit`] Cluster labels for each point in the - dataset given to fit(). - Noisy (if the proportion of the most common label is < pct_thr) - samples are given the label -1. - votes : DataFrame - [Only available after `fit`] Table indexed on `X.index` with - `labels_` under the `lbls` column, and the frequency across draws of - that label under `pct` - solus : DataFrame, shape = [n, reps] - [Only available after `fit`] Each solution of labels for every draw - solus_relabelled: DataFrame, shape = [n, reps] - [Only available after `fit`] Each solution of labels for - every draw, relabelled to be consistent across solutions + labels_ : array + [Only available after `fit`] Cluster labels for each point in the + dataset given to fit(). + Noisy (if the proportion of the most common label is < pct_thr) + samples are given the label -1. + votes : DataFrame + [Only available after `fit`] Table indexed on `X.index` with + `labels_` under the `lbls` column, and the frequency across draws of + that label under `pct` + solus : DataFrame, shape = [n, reps] + [Only available after `fit`] Each solution of labels for every draw + solus_relabelled : DataFrame, shape = [n, reps] + [Only available after `fit`] Each solution of labels for + every draw, relabelled to be consistent across solutions Examples -------- @@ -185,14 +185,11 @@ def fit(self, X, y=None, sample_weight=None, xy=["X", "Y"]): ) # Multi-core implementation of parallel draws if (self.n_jobs == -1) or (self.n_jobs > 1): - pool = _setup_pool(self.n_jobs) # Set different parallel seeds!!! - warn_msg = ( - "Multi-core implementation only works on " - "relabelling solutions. Execution of draws " - "is still sequential." + warnings.warn( + "Multi-core implementation only works on relabelling solutions. " + "Execution of draws is still sequential." ) - warnings.warn(warn_msg) for i in range(self.reps): pars = ( n, @@ -416,8 +413,10 @@ def ensemble(solus_relabelled): 4 7 0.67 """ - f = lambda a: Counter(a).most_common(1)[0] - counts = np.array(list(map(f, solus_relabelled.values))) + + counts = np.array( + list(map(lambda a: Counter(a).most_common(1)[0], solus_relabelled.values)) + ) winner = counts[:, 0] votes = counts[:, 1].astype(int) / solus_relabelled.shape[1] pred = pandas.DataFrame( @@ -501,7 +500,8 @@ def get_cluster_boundary(labels, xys, xy=["X", "Y"], n_jobs=1, crs=None, step=1) >>> polys = get_cluster_boundary(labels, db) >>> polys[0].wkt 'POLYGON ((0.7217553174317995 0.8192869956700687, 0.7605307121989587 0.9086488808086682, 0.9177741225129434 0.8568503024577332, 0.8126209616521135 0.6262871483113925, 0.6125260668293881 0.5475861559192435, 0.5425443680112613 0.7546476915298572, 0.7217553174317995 0.8192869956700687))' - """ + """ # noqa E501 + try: from geopandas import GeoSeries except ModuleNotFoundError: diff --git a/esda/crand.py b/esda/crand.py index eeb7e615..d4413f4c 100644 --- a/esda/crand.py +++ b/esda/crand.py @@ -3,11 +3,12 @@ """ import os -import numpy as np import warnings +import numpy as np + try: - from numba import njit, jit, prange, boolean + from numba import boolean, jit, njit, prange except (ImportError, ModuleNotFoundError): def jit(*dec_args, **dec_kwargs): @@ -25,7 +26,6 @@ def intercepted_function(f, *f_args, **f_kwargs): prange = range boolean = bool -import tempfile __all__ = ["crand"] @@ -119,8 +119,8 @@ def crand( p_sim : ndarray (N,) array with pseudo p-values from conditional permutation rlocals : ndarray - If keep=True, (N, permutations) array with simulated values of stat_func under the - null of spatial randomness; else, empty (1, 1) array + If keep=True, (N, permutations) array with simulated values + of stat_func under the null of spatial randomness; else, empty (1, 1) array """ adj_matrix = w.sparse @@ -153,28 +153,28 @@ def crand( # we need to be careful to shuffle only *other* sites, not # the self-site. This means we need to - ### extract the self-weight, if any + # extract the self-weight, if any self_weights = adj_matrix.diagonal() - ### force the self-site weight to zero + # force the self-site weight to zero with warnings.catch_warnings(): # massive changes to sparsity incur a cost, but it's not # large for simply changing the diag warnings.simplefilter("ignore") adj_matrix.setdiag(0) adj_matrix.eliminate_zeros() - ### extract the weights from a now no-self-weighted adj_matrix + # extract the weights from a now no-self-weighted adj_matrix other_weights = adj_matrix.data - ### use the non-self weight as the cardinality, since - ### this is the set we have to randomize. - ### if there is a self-neighbor, we need to *not* shuffle the - ### self neighbor, since conditional randomization conditions on site i. + # use the non-self weight as the cardinality, since + # this is the set we have to randomize. + # if there is a self-neighbor, we need to *not* shuffle the + # self neighbor, since conditional randomization conditions on site i. cardinalities = np.array((adj_matrix != 0).sum(1)).flatten() max_card = cardinalities.max() permuted_ids = vec_permutations(max_card, n, permutations, seed) if n_jobs != 1: try: - import joblib + import joblib # noqa F401 except (ModuleNotFoundError, ImportError): warnings.warn( f"Parallel processing is requested (n_jobs={n_jobs})," @@ -288,9 +288,9 @@ def compute_chunk( scaling : float Scaling value to apply to every local statistic island_weight: - value to use as a weight for the "fake" neighbor for every island. If numpy.nan, - will propagate to the final local statistic depending on the `stat_func`. If 0, then - the lag is always zero for islands. + value to use as a weight for the "fake" neighbor for every island. + If numpy.nan, will propagate to the final local statistic depending + on the `stat_func`. If 0, then the lag is always zero for islands. Returns ------- @@ -315,27 +315,19 @@ def compute_chunk( for i in range(chunk_n): cardinality = cardinalities[i] if cardinality == 0: # deal with islands - effective_cardinality = 1 weights_i = np.zeros(2, dtype=other_weights.dtype) weights_i[1] = island_weight else: - effective_cardinality = cardinality # we need to fix the self-weight to the first position weights_i = np.zeros(cardinality + 1, dtype=other_weights.dtype) weights_i[0] = self_weights[i] - ### this chomps the next `cardinality` weights off of `weights` - weights_i[1:] = other_weights[wloc : (wloc + cardinality)] + # this chomps the next `cardinality` weights off of `weights` + weights_i[1:] = other_weights[wloc : (wloc + cardinality)] # noqa E203 wloc += cardinality - z_chunk_i = z_chunk[i] mask[chunk_start + i] = False - z_no_i = z[ - mask, - ] rstats = stat_func(chunk_start + i, z, permuted_ids, weights_i, scaling) if keep: - rlocals[ - i, - ] = rstats + rlocals[i] = rstats larger[i] = np.sum(rstats >= observed[i]) return larger, rlocals @@ -370,7 +362,7 @@ def build_weights_offsets(cardinalities: np.ndarray, n_chunks: int): chunk_size = np.int64(n / n_chunks) + 1 start = 0 for i in range(n_chunks): - advance = cardinalities[start : start + chunk_size].sum() + advance = cardinalities[start : start + chunk_size].sum() # noqa E203 boundary_points[i + 1] = boundary_points[i] + advance start += chunk_size return boundary_points @@ -432,11 +424,13 @@ def chunk_generator( chunk_size = starts[1] - starts[0] for i in range(n_jobs): start = starts[i] - z_chunk = z[start : (start + chunk_size)] - self_weights_chunk = self_weights[start : (start + chunk_size)] - observed_chunk = observed[start : (start + chunk_size)] - cardinalities_chunk = cardinalities[start : (start + chunk_size)] - w_chunk = other_weights[w_boundary_points[i] : w_boundary_points[i + 1]] + z_chunk = z[start : (start + chunk_size)] # noqa E203 + self_weights_chunk = self_weights[start : (start + chunk_size)] # noqa E203 + observed_chunk = observed[start : (start + chunk_size)] # noqa E203 + cardinalities_chunk = cardinalities[start : (start + chunk_size)] # noqa E203 + w_chunk = other_weights[ + w_boundary_points[i] : w_boundary_points[i + 1] # noqa E203 + ] yield ( start, z_chunk, @@ -507,9 +501,9 @@ def parallel_crand( scaling : float Scaling value to apply to every local statistic island_weight: - value to use as a weight for the "fake" neighbor for every island. If numpy.nan, - will propagate to the final local statistic depending on the `stat_func`. If 0, then - the lag is always zero for islands. + value to use as a weight for the "fake" neighbor for every island. + If numpy.nan, will propagate to the final local statistic depending + on the `stat_func`. If 0, then the lag is always zero for islands. Returns ------- larger : ndarray diff --git a/esda/gamma.py b/esda/gamma.py index 7ca8342c..f6733b87 100644 --- a/esda/gamma.py +++ b/esda/gamma.py @@ -5,16 +5,14 @@ """ __author__ = "Luc Anselin Serge Rey " +import warnings + import numpy as np from libpysal.weights.spatial_lag import lag_spatial + +from .crand import _prepare_univariate +from .crand import njit as _njit from .tabular import _univariate_handler -import warnings -from .crand import ( - crand as _crand_plus, - njit as _njit, - _prepare_univariate, - _prepare_bivariate, -) __all__ = ["Gamma"] @@ -215,7 +213,7 @@ def __calc(self, z, op): g = (z * zl).sum() elif op == "s": # squared difference zs = np.zeros(z.shape) - z2 = z ** 2 + z2 = z**2 for i, i0 in enumerate(self.w.id_order): neighbors = self.w.neighbor_offsets[i0] wijs = self.w.weights[i0] diff --git a/esda/geary.py b/esda/geary.py index a8dda077..e1b07876 100644 --- a/esda/geary.py +++ b/esda/geary.py @@ -6,6 +6,7 @@ import numpy as np import scipy.stats as stats from libpysal import weights + from .tabular import _univariate_handler __all__ = ["Geary"] @@ -144,13 +145,13 @@ def __init__(self, y, w, transformation="r", permutations=999): self.p_sim = (larger + 1.0) / (permutations + 1.0) self.EC_sim = sum(sim) / permutations self.seC_sim = np.array(sim).std() - self.VC_sim = self.seC_sim ** 2 + self.VC_sim = self.seC_sim**2 self.z_sim = (self.C - self.EC_sim) / self.seC_sim self.p_z_sim = 1 - stats.norm.cdf(np.abs(self.z_sim)) @property def _statistic(self): - """ a standardized accessor for esda statistics""" + """a standardized accessor for esda statistics""" return self.C def __moments(self): @@ -162,8 +163,8 @@ def __moments(self): s2 = w.s2 s02 = s0 * s0 yd = y - y.mean() - yd4 = yd ** 4 - yd2 = yd ** 2 + yd4 = yd**4 + yd2 = yd**2 n2 = n * n k = (yd4.sum() / n) / ((yd2.sum() / n) ** 2) A = (n - 1) * s1 * (n2 - 3 * n + 3 - (n - 1) * k) @@ -191,27 +192,27 @@ def by_col( Parameters ---------- - df : pandas.DataFrame - a pandas dataframe with a geometry column - cols : string or list of string - name or list of names of columns to use to compute the statistic - w : pysal weights object - a weights object aligned with the dataframe. If not provided, this - is searched for in the dataframe's metadata - inplace : bool - a boolean denoting whether to operate on the dataframe inplace or to - return a series contaning the results of the computation. If - operating inplace, with default configurations, - the derived columns will be named like 'column_geary' and 'column_p_sim' - pvalue : string - a string denoting which pvalue should be returned. Refer to the - the Geary statistic's documentation for available p-values - outvals : list of strings - list of arbitrary attributes to return as columns from the - Geary statistic - **stat_kws : keyword arguments - options to pass to the underlying statistic. For this, see the - documentation for the Geary statistic. + df : pandas.DataFrame + a pandas dataframe with a geometry column + cols : string or list of string + name or list of names of columns to use to compute the statistic + w : pysal weights object + a weights object aligned with the dataframe. If not provided, this + is searched for in the dataframe's metadata + inplace : bool + a boolean denoting whether to operate on the dataframe inplace or to + return a series contaning the results of the computation. If + operating inplace, with default configurations, + the derived columns will be named like 'column_geary' and 'column_p_sim' + pvalue : string + a string denoting which pvalue should be returned. Refer to the + the Geary statistic's documentation for available p-values + outvals : list of strings + list of arbitrary attributes to return as columns from the + Geary statistic + **stat_kws : dict + options to pass to the underlying statistic. For this, see the + documentation for the Geary statistic. Returns -------- diff --git a/esda/geary_local.py b/esda/geary_local.py index 64e03024..4d432d4f 100644 --- a/esda/geary_local.py +++ b/esda/geary_local.py @@ -1,11 +1,10 @@ import numpy as np import pandas as pd -import warnings -from scipy import sparse -from scipy import stats from sklearn.base import BaseEstimator -import libpysal as lp -from esda.crand import crand as _crand_plus, njit as _njit, _prepare_univariate + +from esda.crand import _prepare_univariate +from esda.crand import crand as _crand_plus +from esda.crand import njit as _njit class Geary_Local(BaseEstimator): @@ -63,10 +62,10 @@ def __init__( of the function, since numba does not correctly interpret external seeds nor numpy.random.RandomState instances. - island_weight: - value to use as a weight for the "fake" neighbor for every island. If numpy.nan, - will propagate to the final local statistic depending on the `stat_func`. If 0, then - the lag is always zero for islands. + island_weight : + value to use as a weight for the "fake" neighbor for every island. + If numpy.nan, will propagate to the final local statistic depending + on the `stat_func`. If 0, thenthe lag is always zero for islands. Attributes ---------- @@ -126,7 +125,6 @@ def fit(self, x): sig = self.sig keep_simulations = self.keep_simulations n_jobs = self.n_jobs - seed = self.seed self.localG = self._statistic(x, w) @@ -148,11 +146,13 @@ def fit(self, x): # Create empty vector to fill self.labs = np.empty(len(x)) * np.nan # Outliers - self.labs[(self.localG < Eij_mean) & (x > x_mean) & (self.p_sim <= sig)] = 1 + locg_lt_eij = self.localG < Eij_mean + p_leq_sig = self.p_sim <= sig + self.labs[locg_lt_eij & (x > x_mean) & p_leq_sig] = 1 # Clusters - self.labs[(self.localG < Eij_mean) & (x < x_mean) & (self.p_sim <= sig)] = 2 + self.labs[locg_lt_eij & (x < x_mean) & p_leq_sig] = 2 # Other - self.labs[(self.localG > Eij_mean) & (self.p_sim <= sig)] = 3 + self.labs[(self.localG > Eij_mean) & p_leq_sig] = 3 # Non-significant self.labs[self.p_sim > sig] = 4 @@ -191,7 +191,6 @@ def _statistic(x, w): @_njit(fastmath=True) def _local_geary(i, z, permuted_ids, weights_i, scaling): - self_weight = weights_i[0] other_weights = weights_i[1:] zi, zrand = _prepare_univariate(i, z, permuted_ids, other_weights) return (zi - zrand) ** 2 @ other_weights diff --git a/esda/geary_local_mv.py b/esda/geary_local_mv.py index 16f4ceaf..f9dbccc6 100644 --- a/esda/geary_local_mv.py +++ b/esda/geary_local_mv.py @@ -157,15 +157,13 @@ def _crand(self, zvariables): np.random.shuffle(idsi) vars_rand = [] for j in range(nvars): - vars_rand.append(zvariables[j][idsi[rids[:, 0 : wc[i]]]]) + vars_rand.append(zvariables[j][idsi[rids[:, 0 : wc[i]]]]) # noqa E203 # vars rand as tmp # Calculate diff diff = [] for z in range(nvars): - diff.append( - (np.array((zvariables[z][i] - vars_rand[z]) ** 2 * w[i])).sum(1) - / nvars - ) + _diff = (np.array((zvariables[z][i] - vars_rand[z]) ** 2 * w[i])) + diff.append(_diff.sum(1) / nvars) # add up differences temp = np.array([sum(x) for x in zip(*diff)]) # Assign to object to be returned diff --git a/esda/getisord.py b/esda/getisord.py index c87231d7..7390f976 100644 --- a/esda/getisord.py +++ b/esda/getisord.py @@ -5,11 +5,15 @@ __all__ = ["G", "G_Local"] import warnings + from libpysal.common import np, stats from libpysal.weights.spatial_lag import lag_spatial as slag from libpysal.weights.util import fill_diagonal + +from .crand import _prepare_univariate +from .crand import crand as _crand_plus +from .crand import njit as _njit from .tabular import _univariate_handler -from .crand import njit as _njit, crand as _crand_plus, _prepare_univariate PERMUTATIONS = 999 @@ -29,40 +33,40 @@ class G(object): Attributes ---------- - y : array - original variable - w : W - DistanceBand W spatial weights based on distance band - permutation : int - the number of permutations - G : float - the value of statistic - EG : float - the expected value of statistic - VG : float - the variance of G under normality assumption - z_norm : float - standard normal test statistic - p_norm : float - p-value under normality assumption (one-sided) - sim : array - (if permutations > 0) - vector of G values for permutated samples - p_sim : float - p-value based on permutations (one-sided) - null: spatial randomness - alternative: the observed G is extreme it is either extremely high or extremely low - EG_sim : float - average value of G from permutations - VG_sim : float - variance of G from permutations - seG_sim : float - standard deviation of G under permutations. - z_sim : float - standardized G based on permutations - p_z_sim : float - p-value based on standard normal approximation from - permutations (one-sided) + y : array + original variable + w : W + DistanceBand W spatial weights based on distance band + permutation : int + the number of permutations + G : float + the value of statistic + EG : float + the expected value of statistic + VG : float + the variance of G under normality assumption + z_norm : float + standard normal test statistic + p_norm : float + p-value under normality assumption (one-sided) + sim : array + (if permutations > 0) + vector of G values for permutated samples + p_sim : float + p-value based on permutations (one-sided) + null: spatial randomness + alternative: the observed G is extreme it is either + extremely high or extremely low + EG_sim : float + average value of G from permutations + VG_sim : float + variance of G from permutations + seG_sim : float + standard deviation of G under permutations. + z_sim : float + standardized G based on permutations + p_z_sim : float + p-value based on standard normal approximation from permutations (one-sided) Notes ----- @@ -134,7 +138,7 @@ def __init__(self, y, w, permutations=PERMUTATIONS): self.p_sim = (larger + 1.0) / (permutations + 1.0) self.EG_sim = sum(sim) / permutations self.seG_sim = sim.std() - self.VG_sim = self.seG_sim ** 2 + self.VG_sim = self.seG_sim**2 self.z_sim = (self.G - self.EG_sim) / self.seG_sim self.p_z_sim = 1.0 - stats.norm.cdf(np.abs(self.z_sim)) @@ -166,7 +170,7 @@ def __moments(self): EG2NUM = EG2 EG2DEN = ((sum(y) ** 2 - sum(y2)) ** 2) * n * (n - 1) * (n - 2) * (n - 3) self.EG2 = EG2NUM / EG2DEN - self.VG = self.EG2 - self.EG ** 2 + self.VG = self.EG2 - self.EG**2 def __calc(self, y): yl = slag(self.w, y) @@ -175,7 +179,7 @@ def __calc(self, y): @property def _statistic(self): - """ Standardized accessor for esda statistics""" + """Standardized accessor for esda statistics""" return self.G @classmethod @@ -187,31 +191,31 @@ def by_col( Parameters ---------- - df : pandas.DataFrame - a pandas dataframe with a geometry column - cols : string or list of string - name or list of names of columns to use to compute the statistic - w : pysal weights object - a weights object aligned with the dataframe. If not provided, this - is searched for in the dataframe's metadata - inplace : bool - a boolean denoting whether to operate on the dataframe inplace or to - return a series contaning the results of the computation. If - operating inplace, the derived columns will be named 'column_g' - pvalue : string - a string denoting which pvalue should be returned. Refer to the - the G statistic's documentation for available p-values - outvals : list of strings - list of arbitrary attributes to return as columns from the - G statistic - **stat_kws : keyword arguments - options to pass to the underlying statistic. For this, see the - documentation for the G statistic. + df : pandas.DataFrame + a pandas dataframe with a geometry column + cols : string or list of string + name or list of names of columns to use to compute the statistic + w : pysal weights object + a weights object aligned with the dataframe. If not provided, this + is searched for in the dataframe's metadata + inplace : bool + a boolean denoting whether to operate on the dataframe inplace or to + return a series contaning the results of the computation. If + operating inplace, the derived columns will be named 'column_g' + pvalue : string + a string denoting which pvalue should be returned. Refer to the + the G statistic's documentation for available p-values + outvals : list of strings + list of arbitrary attributes to return as columns from the G statistic + **stat_kws : dict + options to pass to the underlying statistic. For this, see the + documentation for the G statistic. Returns -------- - If inplace, None, and operation is conducted on dataframe in memory. Otherwise, - returns a copy of the dataframe with the relevant columns attached. + If inplace, None, and operation is conducted on dataframe + in memory. Otherwise, returns a copy of the dataframe with + the relevant columns attached. """ return _univariate_handler( @@ -250,9 +254,9 @@ class G_Local(object): provided in binary form, and standardization/self-weighting will be handled by the function itself. island_weight: - value to use as a weight for the "fake" neighbor for every island. If numpy.nan, - will propagate to the final local statistic depending on the `stat_func`. If 0, then - the lag is always zero for islands. + value to use as a weight for the "fake" neighbor for every island. + If numpy.nan, will propagate to the final local statistic depending + on the `stat_func`. If 0, then the lag is always zero for islands. Attributes ---------- @@ -279,9 +283,10 @@ class G_Local(object): of arrays of floats (if permutations>0), vector of I values for permutated samples p_sim : array - of floats, p-value based on permutations (one-sided) - null - spatial randomness - alternative - the observed G is extreme it is either extremely high or extremely low + of floats, p-value based on permutations (one-sided) + null - spatial randomness + alternative - the observed G is extreme + (it is either extremely high or extremely low) EG_sim : array of floats, average value of G from permutations VG_sim : array @@ -440,7 +445,6 @@ def __crand(self, keep_simulations): k = self.w.max_neighbors + 1 rids = np.array([np.random.permutation(rid)[0:k] for i in prange]) ids = np.arange(self.w.n) - ido = self.w.id_order wc = self.__getCardinalities() if self.w_transform == "r": den = np.array(wc) + self.star @@ -486,19 +490,18 @@ def calc(self): empirical_mean = (y.sum() - y * remove_self) / N # variance looks complex, yes, but it obtains from E[x^2] - E[x]^2. # So, break it down to allow subtraction of the self-neighbor. - mean_of_squares = ((y ** 2).sum() - (y ** 2) * remove_self) / N - empirical_variance = mean_of_squares - empirical_mean ** 2 + mean_of_squares = ((y**2).sum() - (y**2) * remove_self) / N + empirical_variance = mean_of_squares - empirical_mean**2 # Since we have corrected the diagonal, this should work cardinality = np.asarray(W.sum(axis=1)).squeeze() expected_value = cardinality / N - expected_variance = ( - cardinality - * (N - cardinality) - / (N - 1) - * (1 / N ** 2) - * (empirical_variance / (empirical_mean ** 2)) - ) + + expected_variance = cardinality * (N - cardinality) + expected_variance /= (N - 1) + expected_variance *= (1 / N**2) + expected_variance *= (empirical_variance / (empirical_mean**2)) + z_scores = (statistic - expected_value) / np.sqrt(expected_variance) self.Gs = statistic @@ -520,26 +523,26 @@ def by_col( Parameters ---------- - df : pandas.DataFrame - a pandas dataframe with a geometry column - cols : string or list of string - name or list of names of columns to use to compute the statistic - w : pysal weights object - a weights object aligned with the dataframe. If not provided, this - is searched for in the dataframe's metadata - inplace : bool - a boolean denoting whether to operate on the dataframe inplace or to - return a series contaning the results of the computation. If - operating inplace, the derived columns will be named 'column_g_local' - pvalue : string - a string denoting which pvalue should be returned. Refer to the - the G_Local statistic's documentation for available p-values - outvals : list of strings - list of arbitrary attributes to return as columns from the - G_Local statistic - **stat_kws : keyword arguments - options to pass to the underlying statistic. For this, see the - documentation for the G_Local statistic. + df : pandas.DataFrame + a pandas dataframe with a geometry column + cols : string or list of string + name or list of names of columns to use to compute the statistic + w : pysal weights object + a weights object aligned with the dataframe. If not provided, this + is searched for in the dataframe's metadata + inplace : bool + a boolean denoting whether to operate on the dataframe inplace or to + return a series contaning the results of the computation. If + operating inplace, the derived columns will be named 'column_g_local' + pvalue : string + a string denoting which pvalue should be returned. Refer to the + the G_Local statistic's documentation for available p-values + outvals : list of strings + list of arbitrary attributes to return as columns from the + G_Local statistic + **stat_kws : dict + options to pass to the underlying statistic. For this, see the + documentation for the G_Local statistic. Returns ------- @@ -608,7 +611,7 @@ def _infer_star_and_structure_w(weights, star, transform): else: # star was something else, so try to fill the weights with it try: weights = fill_diagonal(weights, star) - except: + except TypeError: raise TypeError( f"Type of star ({type(star)}) not understood." f" Must be an integer, boolean, float, or numpy.ndarray." @@ -640,9 +643,12 @@ def _g_local_star_crand(i, z, permuted_ids, weights_i, scaling): if __name__ == "__main__": - import geopandas, numpy, esda, importlib - import matplotlib.pyplot as plt - from libpysal import weights, examples + + import geopandas + import numpy + from libpysal import examples, weights + + import esda df = geopandas.read_file(examples.get_path("NAT.shp")) diff --git a/esda/join_counts.py b/esda/join_counts.py index 7d46c4d4..5d5d888e 100644 --- a/esda/join_counts.py +++ b/esda/join_counts.py @@ -4,18 +4,12 @@ """ __author__ = "Sergio J. Rey , Luc Anselin " -from libpysal.weights.spatial_lag import lag_spatial -from .tabular import _univariate_handler -from scipy.stats import chi2_contingency -from scipy.stats import chi2 import numpy as np import pandas as pd -from .crand import ( - crand as _crand_plus, - njit as _njit, - _prepare_univariate, - _prepare_bivariate, -) +from scipy.stats import chi2, chi2_contingency + +from .crand import njit as _njit +from .tabular import _univariate_handler __all__ = ["Join_Counts"] @@ -258,32 +252,33 @@ def by_col( Parameters ---------- - df : pandas.DataFrame - a pandas dataframe with a geometry column - cols : string or list of string - name or list of names of columns to use to compute the statistic - w : pysal weights object - a weights object aligned with the dataframe. If not provided, this - is searched for in the dataframe's metadata - inplace : bool - a boolean denoting whether to operate on the dataframe inplace or to - return a series contaning the results of the computation. If - operating inplace, the derived columns will be named - 'column_join_count' - pvalue : string - a string denoting which pvalue should be returned. Refer to the - the Join_Count statistic's documentation for available p-values - outvals : list of strings - list of arbitrary attributes to return as columns from the - Join_Count statistic - **stat_kws : keyword arguments - options to pass to the underlying statistic. For this, see the - documentation for the Join_Count statistic. + df : pandas.DataFrame + a pandas dataframe with a geometry column + cols : string or list of string + name or list of names of columns to use to compute the statistic + w : pysal weights object + a weights object aligned with the dataframe. If not provided, this + is searched for in the dataframe's metadata + inplace : bool + a boolean denoting whether to operate on the dataframe inplace or to + return a series contaning the results of the computation. If + operating inplace, the derived columns will be named + 'column_join_count' + pvalue : string + a string denoting which pvalue should be returned. Refer to the + the Join_Count statistic's documentation for available p-values + outvals : list of strings + list of arbitrary attributes to return as columns from the + Join_Count statistic + **stat_k : dict + options to pass to the underlying statistic. For this, see the + documentation for the Join_Count statistic. Returns -------- - If inplace, None, and operation is conducted on dataframe in memory. Otherwise, - returns a copy of the dataframe with the relevant columns attached. + If inplace, None, and operation is conducted on + dataframe in memory. Otherwise, returns a copy of the + dataframe with the relevant columns attached. """ if outvals is None: diff --git a/esda/join_counts_local.py b/esda/join_counts_local.py index 379be4f1..88321c1f 100644 --- a/esda/join_counts_local.py +++ b/esda/join_counts_local.py @@ -1,9 +1,11 @@ import numpy as np import pandas as pd -from sklearn.base import BaseEstimator from libpysal import weights -from esda.crand import crand as _crand_plus, njit as _njit, _prepare_univariate +from sklearn.base import BaseEstimator +from esda.crand import _prepare_univariate +from esda.crand import crand as _crand_plus +from esda.crand import njit as _njit PERMUTATIONS = 999 @@ -26,30 +28,29 @@ def __init__( Parameters ---------- - connectivity : scipy.sparse matrix object - the connectivity structure describing - the relationships between observed units. - Need not be row-standardized. - permutations : int - number of random permutations for calculation of pseudo - p_values - n_jobs : int - Number of cores to be used in the conditional randomisation. If -1, - all available cores are used. - keep_simulations : Boolean - (default=True) - If True, the entire matrix of replications under the null - is stored in memory and accessible; otherwise, replications - are not saved - seed : None/int - Seed to ensure reproducibility of conditional randomizations. - Must be set here, and not outside of the function, since numba - does not correctly interpret external seeds - nor numpy.random.RandomState instances. + connectivity : scipy.sparse matrix object + the connectivity structure describing + the relationships between observed units. + Need not be row-standardized. + permutations : int + number of random permutations for calculation of pseudo + p_values + n_jobs : int + Number of cores to be used in the conditional randomisation. If -1, + all available cores are used. + keep_simulations : bool (default True) + If True, the entire matrix of replications under the null + is stored in memory and accessible; otherwise, replications + are not saved + seed : None/int + Seed to ensure reproducibility of conditional randomizations. + Must be set here, and not outside of the function, since numba + does not correctly interpret external seeds + nor numpy.random.RandomState instances. island_weight: - value to use as a weight for the "fake" neighbor for every island. If numpy.nan, - will propagate to the final local statistic depending on the `stat_func`. If 0, then - the lag is always zero for islands. + value to use as a weight for the "fake" neighbor for every island. + If numpy.nan, will propagate to the final local statistic depending + on the `stat_func`. If 0, then the lag is always zero for islands. Attributes ---------- @@ -116,7 +117,7 @@ def fit(self, y, n_jobs=1, permutations=999): keep_simulations = self.keep_simulations n_jobs = self.n_jobs - seed = self.seed + # seed = self.seed self.y = y self.n = len(y) @@ -167,7 +168,7 @@ def _statistic(y, w): @_njit(fastmath=True) def _ljc_uni(i, z, permuted_ids, weights_i, scaling): - self_weight = weights_i[0] + # self_weight = weights_i[0] other_weights = weights_i[1:] zi, zrand = _prepare_univariate(i, z, permuted_ids, other_weights) return zi * (zrand @ other_weights) diff --git a/esda/join_counts_local_bv.py b/esda/join_counts_local_bv.py index f4ccb113..2da6c9a2 100644 --- a/esda/join_counts_local_bv.py +++ b/esda/join_counts_local_bv.py @@ -1,16 +1,11 @@ import numpy as np import pandas as pd -import warnings -from scipy import sparse -from sklearn.base import BaseEstimator from libpysal import weights -from esda.crand import ( - crand as _crand_plus, - njit as _njit, - _prepare_univariate, - _prepare_bivariate, -) +from sklearn.base import BaseEstimator +from esda.crand import _prepare_bivariate, _prepare_univariate +from esda.crand import crand as _crand_plus +from esda.crand import njit as _njit PERMUTATIONS = 999 @@ -33,30 +28,28 @@ def __init__( Parameters ---------- - connectivity : scipy.sparse matrix object - the connectivity structure describing - the relationships between observed units. - Need not be row-standardized. - permutations : int - number of random permutations for calculation of pseudo - p_values - n_jobs : int - Number of cores to be used in the conditional randomisation. If -1, - all available cores are used. - keep_simulations : Boolean - (default=True) - If True, the entire matrix of replications under the null - is stored in memory and accessible; otherwise, replications - are not saved - seed : None/int - Seed to ensure reproducibility of conditional randomizations. - Must be set here, and not outside of the function, since numba - does not correctly interpret external seeds - nor numpy.random.RandomState instances. + connectivity : scipy.sparse matrix object + the connectivity structure describing + the relationships between observed units. + Need not be row-standardized. + permutations : int + number of random permutations for calculation of pseudo p_values + n_jobs : int + Number of cores to be used in the conditional randomisation. + If -1, all available cores are used. + keep_simulations : bool (default True) + If True, the entire matrix of replications under the null + is stored in memory and accessible; otherwise, replications + are not saved + seed : None/int + Seed to ensure reproducibility of conditional randomizations. + Must be set here, and not outside of the function, since numba + does not correctly interpret external seeds + nor numpy.random.RandomState instances. island_weight: - value to use as a weight for the "fake" neighbor for every island. If numpy.nan, - will propagate to the final local statistic depending on the `stat_func`. If 0, then - the lag is always zero for islands. + value to use as a weight for the "fake" neighbor for every island. + If numpy.nan, will propagate to the final local statistic depending + on the `stat_func`. If 0, then the lag is always zero for islands. """ self.connectivity = connectivity @@ -99,9 +92,14 @@ def fit(self, x, z, case="CLC", n_jobs=1, permutations=999): Commpop data replicating GeoDa tutorial (Case 1) >>> import libpysal >>> import geopandas as gpd - >>> commpop = gpd.read_file("https://github.com/jeffcsauer/GSOC2020/raw/master/validation/data/commpop.gpkg") + >>> commpop = gpd.read_file( + ... "https://github.com/jeffcsauer/GSOC2020/raw/master/" + ... "validation/data/commpop.gpkg" + ... ) >>> w = libpysal.weights.Queen.from_dataframe(commpop) - >>> LJC_BV_Case1 = Local_Join_Counts_BV(connectivity=w).fit(commpop['popneg'], commpop['popplus'], case='BJC') + >>> LJC_BV_Case1 = Local_Join_Counts_BV( + ... connectivity=w + ... ).fit(commpop['popneg'], commpop['popplus'], case='BJC') >>> LJC_BV_Case1.LJC >>> LJC_BV_Case1.p_sim @@ -115,12 +113,13 @@ def fit(self, x, z, case="CLC", n_jobs=1, permutations=999): >>> guerry_ds.loc[(guerry_ds['Infants'] > 23574), 'infq5'] = 1 >>> guerry_ds.loc[(guerry_ds['Donatns'] > 10973), 'donq5'] = 1 >>> w = libpysal.weights.Queen.from_dataframe(guerry_ds) - >>> LJC_BV_Case2 = Local_Join_Counts_BV(connectivity=w).fit(guerry_ds['infq5'], guerry_ds['donq5'], case='CLC') + >>> LJC_BV_Case2 = Local_Join_Counts_BV( + ... connectivity=w + ... ).fit(guerry_ds['infq5'], guerry_ds['donq5'], case='CLC') >>> LJC_BV_Case2.LJC >>> LJC_BV_Case2.p_sim """ - # Need to ensure that the np.array() are of - # dtype='float' for numba + # Need to ensure that the np.array() are of dtype='float' for numba x = np.array(x, dtype="float") z = np.array(z, dtype="float") @@ -135,9 +134,7 @@ def fit(self, x, z, case="CLC", n_jobs=1, permutations=999): self.w = w self.case = case - keep_simulations = self.keep_simulations n_jobs = self.n_jobs - seed = self.seed self.LJC = self._statistic(x, z, w, case=case) @@ -182,8 +179,7 @@ def _statistic(x, z, w, case): # different from the esda.Join_Counts() function. adj_list = w.to_adjlist(remove_symmetric=False) - # First, set up a series that maps the values - # to the weights table + # First, set up a series that maps the values to the weights table zseries_x = pd.Series(x, index=w.id_order) zseries_z = pd.Series(z, index=w.id_order) @@ -233,7 +229,6 @@ def _statistic(x, z, w, case): def _ljc_bv_case1(i, z, permuted_ids, weights_i, scaling): zx = z[:, 0] zy = z[:, 1] - self_weight = weights_i[0] other_weights = weights_i[1:] zyi, zyrand = _prepare_univariate(i, zy, permuted_ids, other_weights) return zx[i] * (zyrand @ other_weights) @@ -241,9 +236,7 @@ def _ljc_bv_case1(i, z, permuted_ids, weights_i, scaling): @_njit(fastmath=True) def _ljc_bv_case2(i, z, permuted_ids, weights_i, scaling): - zx = z[:, 0] zy = z[:, 1] - self_weight = weights_i[0] other_weights = weights_i[1:] zxi, zxrand, zyi, zyrand = _prepare_bivariate(i, z, permuted_ids, other_weights) zf = zxrand * zyrand diff --git a/esda/join_counts_local_mv.py b/esda/join_counts_local_mv.py index 49e8fec6..0be44d98 100644 --- a/esda/join_counts_local_mv.py +++ b/esda/join_counts_local_mv.py @@ -1,18 +1,17 @@ import numpy as np import pandas as pd -from scipy import sparse -from sklearn.base import BaseEstimator from libpysal import weights -from esda.crand import crand as _crand_plus, njit as _njit, _prepare_univariate +from sklearn.base import BaseEstimator +from esda.crand import _prepare_univariate +from esda.crand import crand as _crand_plus +from esda.crand import njit as _njit PERMUTATIONS = 999 class Join_Counts_Local_MV(BaseEstimator): - """Multivariate Local Join Count Statistic""" - def __init__( self, connectivity=None, @@ -27,30 +26,30 @@ def __init__( Parameters ---------- - connectivity : scipy.sparse matrix object - the connectivity structure describing - the relationships between observed units. - Need not be row-standardized. - permutations : int - number of random permutations for calculation of pseudo - p_values - n_jobs : int - Number of cores to be used in the conditional randomisation. If -1, - all available cores are used. - keep_simulations : Boolean - (default=True) - If True, the entire matrix of replications under the null - is stored in memory and accessible; otherwise, replications - are not saved - seed : None/int - Seed to ensure reproducibility of conditional randomizations. - Must be set here, and not outside of the function, since numba - does not correctly interpret external seeds - nor numpy.random.RandomState instances. + connectivity : scipy.sparse matrix object + the connectivity structure describing + the relationships between observed units. + Need not be row-standardized. + permutations : int + number of random permutations for calculation of pseudo + p_values + n_jobs : int + Number of cores to be used in the conditional randomisation. If -1, + all available cores are used. + keep_simulations : boll + (default=True) + If True, the entire matrix of replications under the null + is stored in memory and accessible; otherwise, replications + are not saved + seed : None/int + Seed to ensure reproducibility of conditional randomizations. + Must be set here, and not outside of the function, since numba + does not correctly interpret external seeds + nor numpy.random.RandomState instances. island_weight: - value to use as a weight for the "fake" neighbor for every island. If numpy.nan, - will propagate to the final local statistic depending on the `stat_func`. If 0, then - the lag is always zero for islands. + value to use as a weight for the "fake" neighbor for every island. + If numpy.nan, will propagate to the final local statistic depending + on the `stat_func`. If 0, then the lag is always zero for islands. """ @@ -99,7 +98,9 @@ def fit(self, variables, n_jobs=1, permutations=999): >>> guerry_ds.loc[(guerry_ds['Donatns'] > 10973), 'donq5'] = 1 >>> guerry_ds.loc[(guerry_ds['Suicids'] > 55564), 'suic5'] = 1 >>> w = libpysal.weights.Queen.from_dataframe(guerry_ds) - >>> LJC_MV = Local_Join_Counts_MV(connectivity=w).fit([guerry_ds['infq5'], guerry_ds['donq5'], guerry_ds['suic5']]) + >>> LJC_MV = Local_Join_Counts_MV( + ... connectivity=w + ... ).fit([guerry_ds['infq5'], guerry_ds['donq5'], guerry_ds['suic5']]) >>> LJC_MV.LJC >>> LJC_MV.p_sim """ @@ -114,10 +115,6 @@ def fit(self, variables, n_jobs=1, permutations=999): self.variables = np.array(variables, dtype="float") - keep_simulations = self.keep_simulations - n_jobs = self.n_jobs - seed = self.seed - # Need to ensure that the product is an # np.array() of dtype='float' for numba self.ext = np.array(np.prod(np.vstack(variables), axis=0), dtype="float") @@ -159,7 +156,7 @@ def _statistic(variables, w): # focal and neighbor values == 1 focal_all = np.array(np.all(np.dstack(focal) == 1, axis=2)) neighbor_all = np.array(np.all(np.dstack(neighbor) == 1, axis=2)) - MCLC = (focal_all == True) & (neighbor_all == True) + MCLC = (focal_all) & (neighbor_all) # Convert list of True/False to boolean array # and unlist (necessary for building pd.DF) MCLC = list(MCLC * 1) @@ -183,7 +180,6 @@ def _statistic(variables, w): @_njit(fastmath=True) def _ljc_mv(i, z, permuted_ids, weights_i, scaling): - self_weight = weights_i[0] other_weights = weights_i[1:] zi, zrand = _prepare_univariate(i, z, permuted_ids, other_weights) return zi * (zrand @ other_weights) diff --git a/esda/lee.py b/esda/lee.py index 8426aa44..d2aeb480 100644 --- a/esda/lee.py +++ b/esda/lee.py @@ -1,14 +1,10 @@ import numpy from scipy import sparse +from sklearn import preprocessing, utils from sklearn.base import BaseEstimator -from sklearn import preprocessing -from sklearn import utils -from .crand import ( - crand as _crand_plus, - njit as _njit, - _prepare_univariate, - _prepare_bivariate, -) + +from .crand import _prepare_bivariate +from .crand import njit as _njit class Spatial_Pearson(BaseEstimator): @@ -49,7 +45,7 @@ def fit(self, x, y): """ bivariate spatial pearson's R based on Eq. 18 of :cite:`Lee2001`. - L = \dfrac{Z^T (V^TV) Z}{1^T (V^TV) 1} + L = \\dfrac{Z^T (V^TV) Z}{1^T (V^TV) 1} Parameters ---------- @@ -79,10 +75,6 @@ def fit(self, x, y): self.connectivity = sparse.eye(Z.shape[0]) self.association_ = self._statistic(Z, self.connectivity) - standard_connectivity = sparse.csc_matrix( - self.connectivity / self.connectivity.sum(axis=1) - ) - if self.permutations is None: return self elif self.permutations < 1: @@ -154,22 +146,24 @@ def fit(self, x, y): bivariate local pearson's R based on Eq. 22 in Lee (2001), using site-wise conditional randomization from Moran_Local_BV. - L_i = \dfrac{ - n \cdot - \Big[\big(\sum_i w_{ij}(x_j - \bar{x})\big) - \big(\sum_i w_{ij}(y_j - \bar{y})\big) \Big] - } - { - \sqrt{\sum_i (x_i - \bar{x})^2} - \sqrt{\sum_i (y_i - \bar{y})^2}} - = \dfrac{ - n \cdot - (\tilde{x}_j - \bar{x}) - (\tilde{y}_j - \bar{y}) - } - { - \sqrt{\sum_i (x_i - \bar{x})^2} - \sqrt{\sum_i (y_i - \bar{y})^2}} + .. math:: + + L_i = \\dfrac{ + n \\cdot + \\Big[\big(\\sum_i w_{ij}(x_j - \bar{x})\big) + \big(\\sum_i w_{ij}(y_j - \bar{y})\big) \\Big] + } + { + \\sqrt{\\sum_i (x_i - \bar{x})^2} + \\sqrt{\\sum_i (y_i - \bar{y})^2}} + = \\dfrac{ + n \\cdot + (\tilde{x}_j - \bar{x}) + (\tilde{y}_j - \bar{y}) + } + { + \\sqrt{\\sum_i (x_i - \bar{x})^2} + \\sqrt{\\sum_i (y_i - \bar{y})^2}} Lee, Sang Il. (2001), "Developing a bivariate spatial association measure: An integration of Pearson's r and @@ -207,7 +201,7 @@ def fit(self, x, y): max_neighbors = (standard_connectivity != 0).sum(axis=1).max() random_ids = numpy.array( [ - numpy.random.permutation(n - 1)[0 : max_neighbors + 1] + numpy.random.permutation(n - 1)[0 : max_neighbors + 1] # noqa 208 for i in range(self.permutations) ] ) diff --git a/esda/losh.py b/esda/losh.py index 1087d040..181d5bce 100644 --- a/esda/losh.py +++ b/esda/losh.py @@ -1,9 +1,9 @@ -import numpy as np import warnings -from scipy import sparse + +import libpysal as lp +import numpy as np from scipy import stats from sklearn.base import BaseEstimator -import libpysal as lp class LOSH(BaseEstimator): @@ -88,17 +88,21 @@ def fit(self, y, a=2): if self.inference is None: return self - elif self.inference == 'chi-square': + elif self.inference == "chi-square": if a != 2: - warnings.warn(f'Chi-square inference assumes that a=2, but \ - a={a}. This means the inference will be invalid!') + warnings.warn( + f"Chi-square inference assumes that a=2, but \ + a={a}. This means the inference will be invalid!" + ) else: - dof = 2/self.VarHi - Zi = (2*self.Hi)/self.VarHi + dof = 2 / self.VarHi + Zi = (2 * self.Hi) / self.VarHi self.pval = 1 - stats.chi2.cdf(Zi, dof) else: - raise NotImplementedError(f'The requested inference method \ - ({self.inference}) is not currently supported!') + raise NotImplementedError( + f"The requested inference method \ + ({self.inference}) is not currently supported!" + ) return self @@ -113,22 +117,23 @@ def _statistic(y, w, a): rowsum = np.array(w.sparse.sum(axis=1)).flatten() # Calculate spatial mean - ylag = lp.weights.lag_spatial(w, y)/rowsum + ylag = lp.weights.lag_spatial(w, y) / rowsum # Calculate and adjust residuals based on multiplier - yresid = abs(y-ylag)**a + yresid = abs(y - ylag) ** a # Calculate denominator of Hi equation denom = np.mean(yresid) * np.array(rowsum) # Carry out final Hi calculation Hi = lp.weights.lag_spatial(w, yresid) / denom # Calculate average of residuals yresid_mean = np.mean(yresid) + # Calculate VarHi n = len(y) squared_rowsum = np.asarray(w.sparse.multiply(w.sparse).sum(axis=1)).flatten() - - VarHi = ((n-1)**-1) * \ - (denom**-2) * \ - ((np.sum(yresid**2)/n) - yresid_mean**2) * \ - ((n*squared_rowsum) - (rowsum**2)) + term1 = ((n - 1) ** -1) + term2 = (denom**-2) + term3 = ((np.sum(yresid**2) / n) - yresid_mean**2) + term4 = ((n * squared_rowsum) - (rowsum**2)) + VarHi = term1 * term2 * term3 * term4 return (Hi, ylag, yresid, VarHi) diff --git a/esda/map_comparison.py b/esda/map_comparison.py index d4f89e1c..6cd3338c 100644 --- a/esda/map_comparison.py +++ b/esda/map_comparison.py @@ -1,4 +1,5 @@ -import numpy, pandas +import numpy +import pandas from scipy.special import entr try: @@ -27,7 +28,8 @@ def _cast(collection): Cast a collection to a pygeos geometry array. """ try: - import pygeos, geopandas + import geopandas + import pygeos except (ImportError, ModuleNotFoundError) as exception: raise type(exception)( "pygeos and geopandas are required for map comparison statistics." diff --git a/esda/mixture_smoothing.py b/esda/mixture_smoothing.py index f9da6aff..66a86599 100644 --- a/esda/mixture_smoothing.py +++ b/esda/mixture_smoothing.py @@ -6,12 +6,18 @@ in CAMAN R package that is originally written by Peter Schlattmann. """ -__author__ = "Myunghwa Hwang , Luc Anselin , Serge Rey , " + "Luc Anselin , " + "Serge Rey >> mixture = NP_Mixture_Smoother(e,b) - extracting the smoothed rates through the property r of the NP_Mixture_Smoother instance + extracting the smoothed rates through the property + r of the NP_Mixture_Smoother instance >>> mixture.r array([0.10982278, 0.03445531, 0.11018404, 0.11018604]) @@ -111,20 +118,20 @@ class NP_Mixture_Smoother(object): >>> mixture.getRateEstimates() (array([0.0911574, 0.0911574, 0.0911574, 0.0911574]), array([1, 1, 1, 1])) - """ + """ # noqa E501 - def __init__(self, e, b, k=50, acc=1.E-7, numiter=5000, limit=0.01): + def __init__(self, e, b, k=50, acc=1.0e-7, numiter=5000, limit=0.01): self.e = np.asarray(e).flatten() self.b = np.asarray(b).flatten() self.n = len(e) - self.w = 1. / self.n + self.w = 1.0 / self.n self.k = k self.acc = acc self.numiter = numiter self.limit = limit r = self.mixalg() - self.p = r['p'] - self.t = r['grid'] + self.p = r["p"] + self.t = r["grid"] self.r, self.category = self.getRateEstimates() def getSeed(self): @@ -133,7 +140,7 @@ def getSeed(self): r_diff = r_max - r_min step = r_diff / (self.k - 1) grid = np.arange(r_min, r_max + step, step) - p = np.ones(self.k) * 1. / self.k + p = np.ones(self.k) * 1.0 / self.k return p, grid def getMixedProb(self, grid): @@ -146,11 +153,11 @@ def getMixedProb(self, grid): def getGradient(self, mix, p): mix_p = mix * p mix_den = mix_p.sum(axis=1) - obs_id = mix_den > 1.E-13 + obs_id = mix_den > 1.0e-13 for i in range(self.k): mix_den_len = len(mix_den) - if (mix_den > 1.E-13).sum() == mix_den_len: - mix_p[:, i] = (1. / mix_den_len) * mix[:, i] / mix_den + if (mix_den > 1.0e-13).sum() == mix_den_len: + mix_p[:, i] = (1.0 / mix_den_len) * mix[:, i] / mix_den gradient = [] for i in range(self.k): gradient.append(mix_p[:, i][obs_id].sum()) @@ -164,39 +171,44 @@ def getMaxGradient(self, gradient): return (grad_max, grad_max_inx) def getMinGradient(self, gradient, p): - p_fil = p > 1.E-8 + p_fil = p > 1.0e-8 grad_fil = gradient[p_fil] grad_min = grad_fil.min() grad_min_inx = np.where(p_fil)[0][grad_fil.argmin()] - if grad_min >= 1.E+7: - return (1.E+7, 1) + if grad_min >= 1.0e7: + return (1.0e7, 1) return (grad_min, grad_min_inx) def getStepsize(self, mix_den, ht): - mix_den_fil = np.fabs(mix_den) > 1.E-7 + ############################################################################## + # Something seems off in this function + # - a & b are defined twice + # - 4 defined, but unused, variables (commented out) + ############################################################################## + mix_den_fil = np.fabs(mix_den) > 1.0e-7 a = ht[mix_den_fil] / mix_den[mix_den_fil] b = 1.0 + a - b_fil = np.fabs(b) > 1.E-7 w = self.w - sl = w * ht[b_fil] / b[b_fil] - s11 = sl.sum() - s0 = (w * ht).sum() + # b_fil = np.fabs(b) > 1.0e-7 + # sl = w * ht[b_fil] / b[b_fil] + # s11 = sl.sum() + # s0 = (w * ht).sum() - step, oldstep = 0., 0. + step, oldstep = 0.0, 0.0 for i in range(50): - grad1, grad2 = 0., 0. + grad1, grad2 = 0.0, 0.0 for j in range(self.n): a = mix_den[j] + step * ht[j] - if math.fabs(a) > 1.E-7: + if math.fabs(a) > 1.0e-7: b = ht[j] / a grad1 = grad1 + w * b grad2 = grad2 - w * b * b - if math.fabs(grad2) > 1.E-10: + if math.fabs(grad2) > 1.0e-10: step = step - grad1 / grad2 if oldstep > 1.0 and step > oldstep: - step = 1. + step = 1.0 break - if grad1 < 1.E-7: + if grad1 < 1.0e-7: break oldstep = step if step > 1.0: @@ -209,36 +221,49 @@ def vem(self, mix, p, grid): grad, mix_den = self.getGradient(mix, p) grad_max, grad_max_inx = self.getMaxGradient(grad) grad_min, grad_min_inx = self.getMinGradient(grad, p) - ht = (mix[:, grad_max_inx] - mix[:, grad_min_inx] - ) * p[grad_min_inx] + ht = (mix[:, grad_max_inx] - mix[:, grad_min_inx]) * p[grad_min_inx] st = self.getStepsize(mix_den, ht) xs = st * p[grad_min_inx] p[grad_min_inx] = p[grad_min_inx] - xs p[grad_max_inx] = p[grad_max_inx] + xs if (grad_max - 1.0) < self.acc or it == (self.numiter - 1): - res = {'k': self.k, 'accuracy': grad_max - 1.0, 'p': p, 'grid': grid, 'gradient': grad, 'mix_den': mix_den} + res = { + "k": self.k, + "accuracy": grad_max - 1.0, + "p": p, + "grid": grid, + "gradient": grad, + "mix_den": mix_den, + } break return res def update(self, p, grid): - p_inx = p > 1.E-3 + p_inx = p > 1.0e-3 new_p = p[p_inx] new_grid = grid[p_inx] self.k = len(new_p) return new_p, new_grid def em(self, nstep, grid, p): - l = self.k - 1 + l = self.k - 1 # noqa E741 w, n, e, b = self.w, self.n, self.e, self.b if self.k == 1: s11 = (w * b / np.ones(n)).sum() s12 = (w * e / np.ones(n)).sum() grid[l] = s11 / s12 - p[l] = 1. + p[l] = 1.0 mix = self.getMixedProb(grid) grad, mix_den = self.getGradient(mix, p) grad_max, grad_max_inx = self.getMaxGradient(grad) - return {'accuracy': math.fabs(grad_max - 1), 'k': self.k, 'p': p, 'grid': grid, 'gradient': grad, 'mix_den': mix_den} + return { + "accuracy": math.fabs(grad_max - 1), + "k": self.k, + "p": p, + "grid": grid, + "gradient": grad, + "mix_den": mix_den, + } else: res = {} for counter in range(nstep): @@ -246,17 +271,28 @@ def em(self, nstep, grid, p): grad, mix_den = self.getGradient(mix, p) p = p * grad su = p[:-1].sum() - p[l] = 1. - su + p[l] = 1.0 - su for j in range(self.k): - mix_den_fil = mix_den > 1.E-10 + mix_den_fil = mix_den > 1.0e-10 f_len = len(mix_den_fil) - s11 = (w * e[mix_den_fil] / np.ones(f_len) * mix[mix_den_fil, j] / mix_den[mix_den_fil]).sum() - s12 = (w * b[mix_den_fil] * (mix[mix_den_fil, j] / np.ones(f_len)) / mix_den[mix_den_fil]).sum() - if s12 > 1.E-12: + ones = np.ones(f_len) + mdf_j = mix[mix_den_fil, j] + _mdf = mix_den[mix_den_fil] + s11 = (w * e[mix_den_fil] / ones * mdf_j / _mdf).sum() + s12 = (w * b[mix_den_fil] / ones * mdf_j / _mdf).sum() + if s12 > 1.0e-12: grid[j] = s11 / s12 grad_max, grad_max_inx = self.getMaxGradient(grad) - res = {'accuracy': math.fabs(grad_max - 1.), 'step': counter + 1, 'k': self.k, 'p': p, 'grid': grid, 'gradient': grad, 'mix_den': mix_den} - if res['accuracy'] < self.acc and counter > 10: + res = { + "accuracy": math.fabs(grad_max - 1.0), + "step": counter + 1, + "k": self.k, + "p": p, + "grid": grid, + "gradient": grad, + "mix_den": mix_den, + } + if res["accuracy"] < self.acc and counter > 10: break return res @@ -266,7 +302,7 @@ def getLikelihood(self, mix_den): return r def combine(self, res): - p, grid, k = res['p'], res['grid'], self.k + p, grid, k = res["p"], res["grid"], self.k diff = np.fabs(grid[:-1] - grid[1:]) bp_seeds = (diff >= self.limit).nonzero()[0] + 1 if k - len(bp_seeds) > 1: @@ -279,26 +315,31 @@ def combine(self, res): bp.append(bp_seeds[0]) for i in range(1, len(bp_seeds)): if bp_seeds[i] - bp_seeds[i - 1] > 1: - bp.append(a[i]) + ############################################################## + # NEEDS ATTENTION + # `a` is not defined anywhere... what is it? + # -- seems like the condition is never met + # (bp_seeds[i] - bp_seeds[i - 1] > 1) + bp.append(a[i]) # noqa F821 + ############################################################## new_grid, new_p = [], [] for i in range(len(bp) - 1): new_grid.append(grid[bp[i]]) - new_p.append(p[bp[i]:bp[i + 1]].sum()) - self.k = new_k = len(new_p) + new_p.append(p[bp[i] : bp[i + 1]].sum()) # noqa E203 + self.k = len(new_p) new_grid, new_p = np.array(new_grid), np.array(new_p) mix = self.getMixedProb(new_grid) grad, mix_den = self.getGradient(mix, new_p) res = self.em(1, new_grid, new_p) if res is not None: - res['likelihood'] = self.getLikelihood(mix_den) + res["likelihood"] = self.getLikelihood(mix_den) return res def mixalg(self): - e, b, k, n = self.e, self.b, self.k, self.n p, grid = self.getSeed() mix = self.getMixedProb(grid) vem_res = self.vem(mix, p, grid) - p, grid, k = vem_res['p'], vem_res['grid'], vem_res['k'] + p, grid = vem_res["p"], vem_res["grid"] n_p, n_g = self.update(p, grid) em_res = self.em(self.numiter, n_g, n_p) com_res = self.combine(em_res) diff --git a/esda/moran.py b/esda/moran.py index 5bc85316..6308664a 100644 --- a/esda/moran.py +++ b/esda/moran.py @@ -2,23 +2,24 @@ Moran's I Spatial Autocorrelation Statistics """ -__author__ = "Sergio J. Rey , \ - Dani Arribas-Bel , \ - Levi John Wolf " -from libpysal.weights.spatial_lag import lag_spatial as slag -from .smoothing import assuncao_rate -from .tabular import _univariate_handler, _bivariate_handler -from .crand import ( - crand as _crand_plus, - njit as _njit, - _prepare_univariate, - _prepare_bivariate, +__author__ = ( + "Sergio J. Rey , " + "Dani Arribas-Bel , " + "Levi John Wolf " ) -from warnings import warn, simplefilter -from scipy import sparse -import scipy.stats as stats + +from warnings import simplefilter + import numpy as np -import tempfile +import scipy.stats as stats +from libpysal.weights.spatial_lag import lag_spatial as slag +from scipy import sparse + +from .crand import _prepare_univariate +from .crand import crand as _crand_plus +from .crand import njit as _njit +from .smoothing import assuncao_rate +from .tabular import _bivariate_handler, _univariate_handler __all__ = [ "Moran", @@ -162,7 +163,7 @@ def __init__( self.w = w self.permutations = permutations self.__moments() - self.I = self.__calc(self.z) + self.I = self.__calc(self.z) # noqa E741 self.z_norm = (self.I - self.EI) / self.seI_norm self.z_rand = (self.I - self.EI) / self.seI_rand @@ -189,7 +190,7 @@ def __init__( self.p_sim = (larger + 1.0) / (permutations + 1.0) self.EI_sim = sim.sum() / permutations self.seI_sim = np.array(sim).std() - self.VI_sim = self.seI_sim ** 2 + self.VI_sim = self.seI_sim**2 self.z_sim = (self.I - self.EI_sim) / self.seI_sim if self.z_sim > 0: self.p_z_sim = 1 - stats.norm.cdf(self.z_sim) @@ -219,8 +220,8 @@ def __moments(self): self.seI_norm = self.VI_norm ** (1 / 2.0) # variance under randomization - xd4 = z ** 4 - xd2 = z ** 2 + xd4 = z**4 + xd2 = z**2 k_num = xd4.sum() / n k_den = (xd2.sum() / n) ** 2 k = k_num / k_den @@ -250,32 +251,32 @@ def by_col( Parameters ---------- - df : pandas.DataFrame - a pandas dataframe with a geometry column - cols : string or list of string - name or list of names of columns to use to compute the statistic - w : pysal weights object - a weights object aligned with the dataframe. If not provided, this - is searched for in the dataframe's metadata - inplace : bool - a boolean denoting whether to operate on the dataframe inplace or to - return a series contaning the results of the computation. If - operating inplace, the derived columns will be named - 'column_moran' - pvalue : string - a string denoting which pvalue should be returned. Refer to the - the Moran statistic's documentation for available p-values - outvals : list of strings - list of arbitrary attributes to return as columns from the - Moran statistic - **stat_kws : keyword arguments - options to pass to the underlying statistic. For this, see the - documentation for the Moran statistic. + df : pandas.DataFrame + a pandas dataframe with a geometry column + cols : string or list of string + name or list of names of columns to use to compute the statistic + w : pysal weights object + a weights object aligned with the dataframe. If not provided, this + is searched for in the dataframe's metadata + inplace : bool + a boolean denoting whether to operate on the dataframe inplace or to + return a series contaning the results of the computation. If + operating inplace, the derived columns will be named 'column_moran' + pvalue : string + a string denoting which pvalue should be returned. Refer to the + the Moran statistic's documentation for available p-values + outvals : list of strings + list of arbitrary attributes to return as columns from the + Moran statistic + **stat_kws : dict + options to pass to the underlying statistic. For this, see the + documentation for the Moran statistic. Returns -------- - If inplace, None, and operation is conducted on dataframe in memory. Otherwise, - returns a copy of the dataframe with the relevant columns attached. + If inplace, None, and operation is conducted on dataframe + in memory. Otherwise, returns a copy of the dataframe with + the relevant columns attached. """ return _univariate_handler( @@ -411,7 +412,7 @@ def __init__(self, x, y, w, transformation="r", permutations=PERMUTATIONS): self.den = n - 1.0 # zx'zx = zy'zy = n-1 w.transform = transformation self.w = w - self.I = self.__calc(zy) + self.I = self.__calc(zy) # noqa E741 if permutations: nrp = np.random.permutation sim = [self.__calc(nrp(zy)) for i in range(permutations)] @@ -423,7 +424,7 @@ def __init__(self, x, y, w, transformation="r", permutations=PERMUTATIONS): self.p_sim = (larger + 1.0) / (permutations + 1.0) self.EI_sim = sim.sum() / permutations self.seI_sim = np.array(sim).std() - self.VI_sim = self.seI_sim ** 2 + self.VI_sim = self.seI_sim**2 self.z_sim = (self.I - self.EI_sim) / self.seI_sim if self.z_sim > 0: self.p_z_sim = 1 - stats.norm.cdf(self.z_sim) @@ -457,39 +458,39 @@ def by_col( Parameters ---------- - df : pandas.DataFrame - a pandas dataframe with a geometry column - X : list of strings - column name or list of column names to use as X values to compute - the bivariate statistic. If no Y is provided, pairwise comparisons - among these variates are used instead. - Y : list of strings - column name or list of column names to use as Y values to compute - the bivariate statistic. if no Y is provided, pariwise comparisons - among the X variates are used instead. - w : pysal weights object - a weights object aligned with the dataframe. If not provided, this - is searched for in the dataframe's metadata - inplace : bool - a boolean denoting whether to operate on the dataframe inplace or to - return a series contaning the results of the computation. If - operating inplace, the derived columns will be named - 'column_moran_local' - pvalue : string - a string denoting which pvalue should be returned. Refer to the - the Moran_BV statistic's documentation for available p-values - outvals : list of strings - list of arbitrary attributes to return as columns from the - Moran_BV statistic - **stat_kws : keyword arguments - options to pass to the underlying statistic. For this, see the - documentation for the Moran_BV statistic. - + df : pandas.DataFrame + a pandas dataframe with a geometry column + X : list of strings + column name or list of column names to use as X values to compute + the bivariate statistic. If no Y is provided, pairwise comparisons + among these variates are used instead. + Y : list of strings + column name or list of column names to use as Y values to compute + the bivariate statistic. if no Y is provided, pariwise comparisons + among the X variates are used instead. + w : pysal weights object + a weights object aligned with the dataframe. If not provided, this + is searched for in the dataframe's metadata + inplace : bool + a boolean denoting whether to operate on the dataframe inplace or to + return a series contaning the results of the computation. If + operating inplace, the derived columns will be named + 'column_moran_local' + pvalue : string + a string denoting which pvalue should be returned. Refer to the + the Moran_BV statistic's documentation for available p-values + outvals : list of strings + list of arbitrary attributes to return as columns from the + Moran_BV statistic + **stat_kws : keyword arguments + options to pass to the underlying statistic. For this, see the + documentation for the Moran_BV statistic. Returns -------- - If inplace, None, and operation is conducted on dataframe in memory. Otherwise, - returns a copy of the dataframe with the relevant columns attached. + If inplace, None, and operation is conducted on dataframe + in memory. Otherwise, returns a copy of the dataframe with + the relevant columns attached. """ return _bivariate_handler( @@ -587,10 +588,11 @@ def _Moran_BV_Matrix_array(variables, w, permutations=0, varnames=None): """ Base calculation for MORAN_BV_Matrix """ + + k = len(variables) if varnames is None: varnames = ["x{}".format(i) for i in range(k)] - k = len(variables) rk = list(range(0, k - 1)) results = {} for i in rk: @@ -753,38 +755,39 @@ def by_col( Parameters ---------- - df : pandas.DataFrame - a pandas dataframe with a geometry column - events : string or list of strings - one or more names where events are stored - populations : string or list of strings - one or more names where the populations corresponding to the - events are stored. If one population column is provided, it is - used for all event columns. If more than one population column - is provided but there is not a population for every event - column, an exception will be raised. - w : pysal weights object - a weights object aligned with the dataframe. If not provided, this - is searched for in the dataframe's metadata - inplace : bool - a boolean denoting whether to operate on the dataframe inplace or to - return a series contaning the results of the computation. If - operating inplace, the derived columns will be named - 'column_moran_rate' - pvalue : string - a string denoting which pvalue should be returned. Refer to the - the Moran_Rate statistic's documentation for available p-values - outvals : list of strings - list of arbitrary attributes to return as columns from the - Moran_Rate statistic - **stat_kws : keyword arguments - options to pass to the underlying statistic. For this, see the - documentation for the Moran_Rate statistic. + df : pandas.DataFrame + a pandas dataframe with a geometry column + events : string or list of strings + one or more names where events are stored + populations : string or list of strings + one or more names where the populations corresponding to the + events are stored. If one population column is provided, it is + used for all event columns. If more than one population column + is provided but there is not a population for every event + column, an exception will be raised. + w : pysal weights object + a weights object aligned with the dataframe. If not provided, this + is searched for in the dataframe's metadata + inplace : bool + a boolean denoting whether to operate on the dataframe inplace or to + return a series contaning the results of the computation. If + operating inplace, the derived columns will be named + 'column_moran_rate' + pvalue : string + a string denoting which pvalue should be returned. Refer to the + the Moran_Rate statistic's documentation for available p-values + outvals : list of strings + list of arbitrary attributes to return as columns from the + Moran_Rate statistic + **stat_kws : keyword arguments + options to pass to the underlying statistic. For this, see the + documentation for the Moran_Rate statistic. Returns -------- - If inplace, None, and operation is conducted on dataframe in memory. Otherwise, - returns a copy of the dataframe with the relevant columns attached. + If inplace, None, and operation is conducted on dataframe + in memory. Otherwise, returns a copy of the dataframe with + the relevant columns attached. """ if not inplace: @@ -884,11 +887,12 @@ class Moran_Local(object): seed : None/int Seed to ensure reproducibility of conditional randomizations. Must be set here, and not outside of the function, since numba - does not correctly interpret external seeds nor numpy.random.RandomState instances. + does not correctly interpret external seeds nor + numpy.random.RandomState instances. island_weight: - value to use as a weight for the "fake" neighbor for every island. If numpy.nan, - will propagate to the final local statistic depending on the `stat_func`. If 0, then - the lag is always zero for islands. + value to use as a weight for the "fake" neighbor for every island. + If numpy.nan, will propagate to the final local statistic depending + on the `stat_func`. If 0, then the lag is always zero for islands. Attributes ---------- @@ -1076,12 +1080,9 @@ def __quads(self): np = (1 - zp) * lp nn = (1 - zp) * (1 - lp) pn = zp * (1 - lp) - self.q = ( - self.quads[0] * pp - + self.quads[1] * np - + self.quads[2] * nn - + self.quads[3] * pn - ) + + q0, q1, q2, q3 = self.quads + self.q = (q0 * pp) + (q1 * np) + (q2 * nn) + (q3 * pn) def __moments(self): W = self.w.sparse @@ -1091,40 +1092,43 @@ def __moments(self): m2 = (z * z).sum() / n wi = np.asarray(W.sum(axis=1)).flatten() wi2 = np.asarray(W.multiply(W).sum(axis=1)).flatten() + # --------------------------------------------------------- # Conditional randomization null, Sokal 1998, Eqs. A7 & A8 # assume that division is as written, so that # a - b / (n - 1) means a - (b / (n-1)) # --------------------------------------------------------- - expectation = -(z ** 2 * wi) / ((n - 1) * m2) - variance = ( - (z / m2) ** 2 - * (n / (n - 2)) - * (wi2 - (wi ** 2 / (n - 1))) - * (m2 - (z ** 2 / (n - 1))) - ) + expectation = -(z**2 * wi) / ((n - 1) * m2) + var_term1 = (z / m2) ** 2 + var_term2 = (n / (n - 2)) + var_term3 = (wi2 - (wi**2 / (n - 1))) + var_term4 = (m2 - (z**2 / (n - 1))) + variance = var_term1 * var_term2 * var_term3 * var_term4 self.EIc = expectation self.VIc = variance + # --------------------------------------------------------- # Total randomization null, Sokal 1998, Eqs. A3 & A4* # --------------------------------------------------------- - m4 = z ** 4 / n - b2 = m4 / m2 ** 2 + m4 = z**4 / n + b2 = m4 / m2**2 expectation = -wi / (n - 1) + # assume that "avoiding identical subscripts" in :cite:`Anselin1995` - # includes i==h and i==k, we can use the form due to :cite:`sokal1998local` below. + # includes i==h and i==k, we can use the form due to + # :cite:`sokal1998local` below. + # wikh = _wikh_fast(W) # variance_anselin = (wi2 * (n - b2)/(n-1) # + 2*wikh*(2*b2 - n) / ((n-1)*(n-2)) # - wi**2/(n-1)**2) self.EI = expectation - self.VI = ( - wi2 * (n - b2) / (n - 1) - + (wi ** 2 - wi2) * (2 * b2 - n) / ((n - 1) * (n - 2)) - - (-wi / (n - 1)) ** 2 - ) + n1 = n - 1 + self.VI = wi2 * (n - b2) / n1 + self.VI += (wi**2 - wi2) * (2 * b2 - n) / (n1 * (n - 2)) + self.VI -= (-wi / n1) ** 2 @property def _statistic(self): @@ -1140,32 +1144,33 @@ def by_col( Parameters ---------- - df : pandas.DataFrame - a pandas dataframe with a geometry column - cols : string or list of string - name or list of names of columns to use to compute the statistic - w : pysal weights object - a weights object aligned with the dataframe. If not provided, this - is searched for in the dataframe's metadata - inplace : bool - a boolean denoting whether to operate on the dataframe inplace or to - return a series contaning the results of the computation. If - operating inplace, the derived columns will be named - 'column_moran_local' - pvalue : string - a string denoting which pvalue should be returned. Refer to the - the Moran_Local statistic's documentation for available p-values - outvals : list of strings - list of arbitrary attributes to return as columns from the - Moran_Local statistic - **stat_kws : keyword arguments - options to pass to the underlying statistic. For this, see the - documentation for the Moran_Local statistic. + df : pandas.DataFrame + a pandas dataframe with a geometry column + cols : string or list of string + name or list of names of columns to use to compute the statistic + w : pysal weights object + a weights object aligned with the dataframe. If not provided, this + is searched for in the dataframe's metadata + inplace : bool + a boolean denoting whether to operate on the dataframe inplace or to + return a series contaning the results of the computation. If + operating inplace, the derived columns will be named + 'column_moran_local' + pvalue : string + a string denoting which pvalue should be returned. Refer to the + the Moran_Local statistic's documentation for available p-values + outvals : list of strings + list of arbitrary attributes to return as columns from the + Moran_Local statistic + **stat_kws : dict + options to pass to the underlying statistic. For this, see the + documentation for the Moran_Local statistic. Returns -------- - If inplace, None, and operation is conducted on dataframe in memory. Otherwise, - returns a copy of the dataframe with the relevant columns attached. + If inplace, None, and operation is conducted on dataframe + in memory. Otherwise, returns a copy of the dataframe with + the relevant columns attached. """ return _univariate_handler( @@ -1217,11 +1222,12 @@ class Moran_Local_BV(object): seed : None/int Seed to ensure reproducibility of conditional randomizations. Must be set here, and not outside of the function, since numba - does not correctly interpret external seeds nor numpy.random.RandomState instances. + does not correctly interpret external seeds nor + numpy.random.RandomState instances. island_weight: - value to use as a weight for the "fake" neighbor for every island. If numpy.nan, - will propagate to the final local statistic depending on the `stat_func`. If 0, then - the lag is always zero for islands. + value to use as a weight for the "fake" neighbor for every island. + If numpy.nan, will propagate to the final local statistic depending + on the `stat_func`. If 0, then the lag is always zero for islands. Attributes ---------- @@ -1373,12 +1379,9 @@ def __quads(self): np = (1 - zp) * lp nn = (1 - zp) * (1 - lp) pn = zp * (1 - lp) - self.q = ( - self.quads[0] * pp - + self.quads[1] * np - + self.quads[2] * nn - + self.quads[3] * pn - ) + + q0, q1, q2, q3 = self.quads + self.q = (q0 * pp) + (q1 * np) + (q2 * nn) + (q3 * pn) @property def _statistic(self): @@ -1402,39 +1405,39 @@ def by_col( Parameters ---------- - df : pandas.DataFrame - a pandas dataframe with a geometry column - X : list of strings - column name or list of column names to use as X values to compute - the bivariate statistic. If no Y is provided, pairwise comparisons - among these variates are used instead. - Y : list of strings - column name or list of column names to use as Y values to compute - the bivariate statistic. if no Y is provided, pariwise comparisons - among the X variates are used instead. - w : pysal weights object - a weights object aligned with the dataframe. If not provided, this - is searched for in the dataframe's metadata - inplace : bool - a boolean denoting whether to operate on the dataframe inplace or to - return a series contaning the results of the computation. If - operating inplace, the derived columns will be named - 'column_moran_local_bv' - pvalue : string - a string denoting which pvalue should be returned. Refer to the - the Moran_Local_BV statistic's documentation for available p-values - outvals : list of strings - list of arbitrary attributes to return as columns from the - Moran_Local_BV statistic - **stat_kws : keyword arguments - options to pass to the underlying statistic. For this, see the - documentation for the Moran_Local_BV statistic. - + df : pandas.DataFrame + a pandas dataframe with a geometry column + X : list of strings + column name or list of column names to use as X values to compute + the bivariate statistic. If no Y is provided, pairwise comparisons + among these variates are used instead. + Y : list of strings + column name or list of column names to use as Y values to compute + the bivariate statistic. if no Y is provided, pariwise comparisons + among the X variates are used instead. + w : pysal weights object + a weights object aligned with the dataframe. If not provided, this + is searched for in the dataframe's metadata + inplace : bool + a boolean denoting whether to operate on the dataframe inplace or to + return a series contaning the results of the computation. If + operating inplace, the derived columns will be named + 'column_moran_local_bv' + pvalue : string + a string denoting which pvalue should be returned. Refer to the + the Moran_Local_BV statistic's documentation for available p-values + outvals : list of strings + list of arbitrary attributes to return as columns from the + Moran_Local_BV statistic + **stat_kws : dict + options to pass to the underlying statistic. For this, see the + documentation for the Moran_Local_BV statistic. Returns -------- - If inplace, None, and operation is conducted on dataframe in memory. Otherwise, - returns a copy of the dataframe with the relevant columns attached. + If inplace, None, and operation is conducted on dataframe + in memory. Otherwise, returns a copy of the dataframe with + the relevant columns attached. """ return _bivariate_handler( @@ -1492,9 +1495,9 @@ class Moran_Local_Rate(Moran_Local): Must be set here, and not outside of the function, since numba does not correctly interpret external seeds nor numpy.random.RandomState instances. island_weight: float - value to use as a weight for the "fake" neighbor for every island. If numpy.nan, - will propagate to the final local statistic depending on the `stat_func`. If 0, then - the lag is always zero for islands. + value to use as a weight for the "fake" neighbor for every island. + If numpy.nan, will propagate to the final local statistic depending + on the `stat_func`. If 0, then the lag is always zero for islands. Attributes ---------- @@ -1550,16 +1553,19 @@ class Moran_Local_Rate(Moran_Local): >>> e = np.array(f.by_col('SID79')) >>> b = np.array(f.by_col('BIR79')) >>> from esda.moran import Moran_Local_Rate - >>> lm = Moran_Local_Rate(e, b, w, transformation = "r", permutations = 99) + >>> lm = Moran_Local_Rate(e, b, w, transformation="r", permutations=99) >>> lm.q[:10] array([2, 4, 3, 1, 2, 1, 1, 4, 2, 4]) - >>> lm = Moran_Local_Rate(e, b, w, transformation = "r", permutations = 99, geoda_quads=True) + >>> lm = Moran_Local_Rate( + ... e, b, w, transformation = "r", permutations=99, geoda_quads=True + ) >>> lm.q[:10] array([3, 4, 2, 1, 3, 1, 1, 4, 3, 4]) Note random components result is slightly different values across architectures so the results have been removed from doctests and will be moved into unittests that are conditional on architectures + """ def __init__( @@ -1612,37 +1618,39 @@ def by_col( Parameters ---------- - df : pandas.DataFrame - a pandas dataframe with a geometry column - events : string or list of strings - one or more names where events are stored - populations : string or list of strings - one or more names where the populations corresponding to the - events are stored. If one population column is provided, it is - used for all event columns. If more than one population column - is provided but there is not a population for every event - column, an exception will be raised. - w : pysal weights object - a weights object aligned with the dataframe. If not provided, this - is searched for in the dataframe's metadata - inplace : bool - a boolean denoting whether to operate on the dataframe inplace or to - return a series contaning the results of the computation. If - operating inplace, the derived columns will be named 'column_moran_local_rate' - pvalue : string - a string denoting which pvalue should be returned. Refer to the - the Moran_Local_Rate statistic's documentation for available p-values - outvals : list of strings - list of arbitrary attributes to return as columns from the - Moran_Local_Rate statistic - **stat_kws : keyword arguments - options to pass to the underlying statistic. For this, see the - documentation for the Moran_Local_Rate statistic. + df : pandas.DataFrame + a pandas dataframe with a geometry column + events : string or list of strings + one or more names where events are stored + populations : string or list of strings + one or more names where the populations corresponding to the + events are stored. If one population column is provided, it is + used for all event columns. If more than one population column + is provided but there is not a population for every event + column, an exception will be raised. + w : pysal weights object + a weights object aligned with the dataframe. If not provided, this + is searched for in the dataframe's metadata + inplace : bool + a boolean denoting whether to operate on the dataframe + inplace or to return a series contaning the results of + the computation. If operating inplace, the derived columns + will be named 'column_moran_local_rate' + pvalue : string + a string denoting which pvalue should be returned. Refer to the + the Moran_Local_Rate statistic's documentation for available p-values + outvals : list of strings + list of arbitrary attributes to return as columns from the + Moran_Local_Rate statistic + **stat_kws : dict + options to pass to the underlying statistic. For this, see the + documentation for the Moran_Local_Rate statistic. Returns -------- - If inplace, None, and operation is conducted on dataframe in memory. Otherwise, - returns a copy of the dataframe with the relevant columns attached. + If inplace, None, and operation is conducted on dataframe + in memory. Otherwise, returns a copy of the dataframe with + the relevant columns attached. """ if not inplace: @@ -1711,7 +1719,9 @@ def _wikh_fast(W, sokal_correction=False): """ This computes the outer product of weights for each observation. - w_{i(kh)} = \sum_{k \neq i}^n \sum_{h \neq i}^n w_ik * w_hk + .. math:: + + w_{i(kh)} = \\sum_{k \neq i}^n \\sum_{h \neq i}^n w_ik * w_hk If the :cite:`sokal1998local` version is used, then we also have h \neq k Since this version introduces a simplification in the expression @@ -1720,14 +1730,14 @@ def _wikh_fast(W, sokal_correction=False): Arguments --------- - W : scipy sparse matrix - a sparse matrix describing the spatial relationships - between observations. - sokal_correction: bool - Whether to avoid self-neighbors in the summation of weights. - If False (default), then the outer product of all weights - for observation i are used, regardless if they are of the form - w_hh or w_kk. + W : scipy sparse matrix + a sparse matrix describing the spatial relationships + between observations. + sokal_correction : bool + Whether to avoid self-neighbors in the summation of weights. + If False (default), then the outer product of all weights + for observation i are used, regardless if they are of the form + w_hh or w_kk. Returns ------- diff --git a/esda/shape.py b/esda/shape.py index 3f103a77..c45ee5b7 100644 --- a/esda/shape.py +++ b/esda/shape.py @@ -15,7 +15,8 @@ def _cast(collection): Cast a collection to a pygeos geometry array. """ try: - import pygeos, geopandas + import geopandas + import pygeos except (ImportError, ModuleNotFoundError) as exception: raise type(exception)("pygeos and geopandas are required for shape statistics.") @@ -66,7 +67,6 @@ def get_angles(collection, return_indices=False): exploded = pygeos.get_parts(ga) coords = pygeos.get_coordinates(exploded) n_coords_per_geom = pygeos.get_num_coordinates(exploded) - n_parts_per_geom = pygeos.get_num_geometries(exploded) angles = numpy.asarray(_get_angles(coords, n_coords_per_geom)) if return_indices: return angles, numpy.repeat( @@ -85,11 +85,9 @@ def _get_angles(points, n_coords_per_geom): """ # Start at the first point of the first geometry offset = int(0) - start = points[0] on_geom = 0 on_coord = 0 result = [] - n_points = len(points) while True: # if we're on the last point before the closure point, if on_coord == (n_coords_per_geom[on_geom] - 1): @@ -125,7 +123,7 @@ def _get_angles(points, n_coords_per_geom): def isoperimetric_quotient(collection): - """ + r""" The Isoperimetric quotient, defined as the ratio of a polygon's area to the area of the equi-perimeter circle. @@ -206,7 +204,7 @@ def diameter_ratio(collection, rotated=True): if rotated: box = pygeos.minimum_rotated_rectangle(ga) coords = pygeos.get_coordinates(box) - a, b, c, d = (coords[0::5], coords[1::5], coords[2::5], coords[3::5]) + a, b, _, d = (coords[0::5], coords[1::5], coords[2::5], coords[3::5]) widths = numpy.sqrt(numpy.sum((a - b) ** 2, axis=1)) heights = numpy.sqrt(numpy.sum((a - d) ** 2, axis=1)) else: @@ -264,9 +262,12 @@ def convex_hull_ratio(collection): def fractal_dimension(collection, support="hex"): """ - The fractal dimension of the boundary of a shape, assuming a given spatial support for the geometries. + The fractal dimension of the boundary of a shape, assuming a given + spatial support for the geometries. - Note that this derivation assumes a specific ideal spatial support for the polygon, and is thus may not return valid results for complex or highly irregular geometries. + Note that this derivation assumes a specific ideal spatial support + for the polygon, and is thus may not return valid results for + complex or highly irregular geometries. """ ga = _cast(collection) P = pygeos.measurement.length(ga) @@ -279,7 +280,8 @@ def fractal_dimension(collection, support="hex"): return 2 * numpy.log(P / (2 * numpy.pi)) / numpy.log(A / numpy.pi) else: raise ValueError( - f"The support argument must be one of 'hex', 'circle', or 'square', but {support} was provided." + "The support argument must be one of 'hex', 'circle', or 'square', " + f"but {support} was provided." ) @@ -315,14 +317,16 @@ def squareness(collection): def rectangularity(collection): """ - Ratio of the area of the shape to the area of its minimum bounding rotated rectangle + Ratio of the area of the shape to the area + of its minimum bounding rotated rectangle Reveals a polygon’s degree of being curved inward. .. math:: \\frac{A}{A_{MBR}} - where :math:`A` is the area and :math:`A_{MBR}` is the area of minimum bounding + where :math:`A` is the area and :math:`A_{MBR}` + is the area of minimum bounding rotated rectangle. Notes @@ -414,7 +418,8 @@ def moment_of_inertia(collection): Thus, for constant unit mass at each boundary point, the MoI of this pointcloud is - \sum_i d_{i,c}^2 + .. math:: + \\sum_i d_{i,c}^2 where c is the centroid of the polygon @@ -441,7 +446,7 @@ def moa_ratio(collection): """ ga = _cast(collection) r = pygeos.measurement.length(ga) / (2 * numpy.pi) - return (numpy.pi * 0.5 * r ** 4) / second_areal_moment(ga) + return (numpy.pi * 0.5 * r**4) / second_areal_moment(ga) def nmi(collection): @@ -460,8 +465,9 @@ def second_areal_moment(collection): moment of area is actually the cross-moment of area between the X and Y dimensions: - I_xy = (1/24)\sum^{i=N}^{i=1} (x_iy_{i+1} + 2*x_iy_i + 2*x_{i+1}y_{i+1} + - x_{i+1}y_i)(x_iy_i - x_{i+1}y_i) + .. math:: + I_xy = (1/24)\\sum^{i=N}^{i=1} (x_iy_{i+1} + 2*x_iy_i + 2*x_{i+1}y_{i+1} + + x_{i+1}y_i)(x_iy_i - x_{i+1}y_i) where x_i, y_i is the current point and x_{i+1}, y_{i+1} is the next point, and where x_{n+1} = x_1, y_{n+1} = 1. @@ -483,7 +489,6 @@ def second_areal_moment(collection): for hole_ix in range(n_holes): hole = pygeos.get_coordinates(pygeos.get_interior_ring(ga, hole_ix)) result[i] -= _second_moa_ring(hole) - n_parts = pygeos.get_num_geometries(geometry) for part in pygeos.get_parts(geometry): result[i] += _second_moa_ring(pygeos.get_coordinates(part)) # must divide everything by 24 and flip if polygon is clockwise. @@ -500,12 +505,11 @@ def _second_moa_ring(points): for i in prange(len(points[:-1])): x_tail, y_tail = points[i] x_head, y_head = points[i + 1] - moi += (x_tail * y_head - x_head * y_tail) * ( - x_tail * y_head - + 2 * x_tail * y_tail - + 2 * x_head * y_head - + x_head * y_tail - ) + xtyh = x_tail * y_head + xhyt = x_head * y_tail + xtyt = x_tail * y_tail + xhyh = x_head * y_head + moi += (xtyh - xhyt) * (xtyh + 2 * xtyt + 2 * xhyh + xhyt) return moi diff --git a/esda/silhouettes.py b/esda/silhouettes.py index 2aa45f50..d732c6e9 100644 --- a/esda/silhouettes.py +++ b/esda/silhouettes.py @@ -1,26 +1,28 @@ +import warnings + import numpy as np -from scipy.sparse import csgraph as cg from scipy import sparse as sp +from scipy.sparse import csgraph as cg try: + import pandas as pd import sklearn.metrics as sk import sklearn.metrics.pairwise as skp - from sklearn.preprocessing import LabelEncoder - import pandas as pd + from sklearn.preprocessing import LabelEncoder # noqa F401 HAS_REQUIREMENTS = True -except ImportError as e: +except ImportError: HAS_REQUIREMENTS = False def _raise_initial_error(): missing = [] try: - import sklearn + import sklearn # noqa F401 except ImportError: missing.append("scikit-learn") try: - import pandas + import pandas # noqa F401 except ImportError: missing.append("pandas") raise ImportError( @@ -50,7 +52,8 @@ def path_silhouette( directed=False, ): """ - Compute a path silhouette for all observations :cite:`wolf2019geosilhouettes,Rousseeuw1987`. + Compute a path silhouette for all observations + :cite:`wolf2019geosilhouettes,Rousseeuw1987`. Parameters @@ -250,7 +253,8 @@ def path_silhouette( def boundary_silhouette(data, labels, W, metric=skp.euclidean_distances): """ - Compute the observation-level boundary silhouette score :cite:`wolf2019geosilhouettes`. + Compute the observation-level boundary silhouette + score :cite:`wolf2019geosilhouettes`. Parameters ---------- @@ -280,8 +284,8 @@ def boundary_silhouette(data, labels, W, metric=skp.euclidean_distances): So, instead of considering *all* clusters when finding the next-best-fit cluster, only clusters that `i` borders are considered. This is supposed to model the fact that, in spatially-constrained clustering, - observation i can only be reassigned from cluster c to cluster k if some observation - j neighbors i and also resides in k. + observation i can only be reassigned from cluster c to cluster k if + some observation j neighbors i and also resides in k. If an observation only neighbors its own cluster, i.e. is not on the boundary of a cluster, this value is zero. @@ -383,7 +387,10 @@ def silhouette_alist(data, labels, alist, indices=None, metric=skp.euclidean_dis containing `focal` id, `neighbor` id, and `label_focal`, and `label_neighbor`, this computes: - `d(i,label_neighbor) - d(i,label_focal) / (max(d(i,label_neighbor), d(i,label_focal)))` + .. math:: + + d(i,label_neighbor) - d(i,label_focal) + / (max(d(i,label_neighbor), d(i,label_focal))) Parameters ---------- @@ -465,17 +472,17 @@ def silhouette_alist(data, labels, alist, indices=None, metric=skp.euclidean_dis neighbor_mask = np.nonzero(neighbor_mask.values)[0] if len(neighbor_mask) == 0: sils.append(0) - warn( - "A link ({},{}) has been found to have an empty set of neighbors. " - " This may happen when a label assignment is missing for the neighbor unit." - " Check that no labels are missing.".format(row.focal, row.neighbor) + warnings.warn( + f"A link ({row.focal},{row.neighbor}) has been found to have " + "an empty set of neighbors. This may happen when a label " + "assignment is missing for the neighbor unit. " + "Check that no labels are missing." ) continue outer_distance = full_distances[i_Xc, neighbor_mask].mean() - sils.append( - (outer_distance - within_cluster) - / np.maximum(outer_distance, within_cluster) - ) + dist_diff = outer_distance - within_cluster + dist_max = np.maximum(outer_distance, within_cluster) + sils.append(dist_diff / dist_max) result["silhouette"] = sils return result.sort_values("focal").reset_index(drop=True) @@ -492,22 +499,22 @@ def nearest_label( Parameters ---------- data : (N,P) array to cluster on or DataFrame indexed on the same values as - that in alist.focal/alist.neighbor - labels: (N,) array containing classifications, indexed on the same values - as that in alist.focal/alist.neighbor - metric : callable, array, - a function that takes an argument (data) and returns the all-pairs - distances/dissimilarity between observations. + that in alist.focal/alist.neighbor + labels : (N,) array containing classifications, indexed on the same values + as that in alist.focal/alist.neighbor + metric : callable, array, + a function that takes an argument (data) and returns the all-pairs + distances/dissimilarity between observations. return_distance: bool - Whether to return the distance from the observation to its nearest - cluster in feature space. If True, the tuple of (nearest_label, dissim) - is returned. If False, only the nearest_label array is returned. + Whether to return the distance from the observation to its nearest + cluster in feature space. If True, the tuple of (nearest_label, dissim) + is returned. If False, only the nearest_label array is returned. keep_self: bool - whether to allow observations to use their current cluster as their - nearest label. If True, an observation's existing cluster assignment can - also be the cluster it is closest to. If False, an observation's existing - cluster assignment cannot be the cluster it is closest to. This would mean - the function computes the nearest *alternative* cluster. + whether to allow observations to use their current cluster as their + nearest label. If True, an observation's existing cluster assignment can + also be the cluster it is closest to. If False, an observation's existing + cluster assignment cannot be the cluster it is closest to. This would mean + the function computes the nearest *alternative* cluster. Returns ------- @@ -539,7 +546,6 @@ def nearest_label( nearest_label_dissim = np.empty(labels.shape) for label in unique_labels: this_label_mask = labels == label - n_in_label = this_label_mask.sum() this_label_mask = np.nonzero(this_label_mask)[0] next_best_fit = np.ones(this_label_mask.shape) * np.inf next_best_label = np.empty(this_label_mask.shape, dtype=labels.dtype) diff --git a/esda/smaup.py b/esda/smaup.py index 127b4ed7..ba3f1647 100644 --- a/esda/smaup.py +++ b/esda/smaup.py @@ -13,7 +13,8 @@ class Smaup(object): - """S-maup: Statistical Test to Measure the Sensitivity to the Modifiable Areal Unit Problem + """S-maup: Statistical Test to Measure the Sensitivity + to the Modifiable Areal Unit Problem Parameters ---------- diff --git a/esda/smoothing.py b/esda/smoothing.py index 2f312e53..599fb1cc 100644 --- a/esda/smoothing.py +++ b/esda/smoothing.py @@ -1,4 +1,5 @@ from __future__ import division + """ Apply smoothing to rate computation @@ -12,24 +13,59 @@ """ -__author__ = "Myunghwa Hwang , David Folch , Luc Anselin , Serge Rey , " + "David Folch , " + "Luc Anselin , " + "Serge Rey = cumsum_threshold).nonzero()[0][0] if reordered_w[median_inx] == cumsum_threshold and len(d) - 1 > median_inx: - return np.sort(d)[median_inx:median_inx + 2].mean() + return np.sort(d)[median_inx : median_inx + 2].mean() # noqa E203 return np.sort(d)[median_inx] @@ -155,9 +191,9 @@ def sum_by_n(d, w, n): """ t = len(d) - h = t // n #must be floor! + h = t // n # must be floor! d = d * w - return np.array([sum(d[i: i + h]) for i in range(0, t, h)]) + return np.array([sum(d[i : i + h]) for i in range(0, t, h)]) # noqa E203 def crude_age_standardization(e, b, n): @@ -165,12 +201,13 @@ def crude_age_standardization(e, b, n): Parameters ---------- - e : array - (n*h, 1), event variable measured for each age group across n spatial units - b : array - (n*h, 1), population at risk variable measured for each age group across n spatial units - n : integer - the number of spatial units + e : array + (n*h, 1), event variable measured for each age group across n spatial units + b : array + (n*h, 1), population at risk variable measured for each age + group across n spatial units + n : integer + the number of spatial units Notes ----- @@ -218,16 +255,17 @@ def direct_age_standardization(e, b, s, n, alpha=0.05): Parameters ---------- - e : array - (n*h, 1), event variable measured for each age group across n spatial units - b : array - (n*h, 1), population at risk variable measured for each age group across n spatial units - s : array - (n*h, 1), standard population for each age group across n spatial units - n : integer - the number of spatial units - alpha : float - significance level for confidence interval + e : array + (n*h, 1), event variable measured for each age group across n spatial units + b : array + (n*h, 1), population at risk variable measured for each + age group across n spatial units + s : array + (n*h, 1), standard population for each age group across n spatial units + n : integer + the number of spatial units + alpha : float + significance level for confidence interval Notes ----- @@ -256,10 +294,11 @@ def direct_age_standardization(e, b, s, n, alpha=0.05): >>> b = np.array([1000, 1000, 1100, 900, 1000, 900, 1100, 900]) For direct age standardization, we also need the data for standard population. - Standard population is a reference population-at-risk (e.g., population distribution for the U.S.) - whose age distribution can be used as a benchmarking point for comparing age distributions - across regions (e.g., population distribution for Arizona and California). - Another array including standard population is created. + Standard population is a reference population-at-risk (e.g., population + distribution for the U.S.) whose age distribution can be used as a benchmarking + point for comparing age distributions across regions (e.g., population + distribution for Arizona and California). Another array including standard + population is created. >>> s = np.array([1000, 900, 1000, 900, 1000, 900, 1000, 900]) @@ -281,11 +320,11 @@ def direct_age_standardization(e, b, s, n, alpha=0.05): var_estimate = sum_by_n(e, np.square(age_weight), n) g_a = np.square(adjusted_r) / var_estimate g_b = var_estimate / adjusted_r - k = [age_weight[i:i + len(b) // n].max() for i in range(0, len(b), - len(b) // n)] + _b = len(b) + _rb = range(0, _b, _b // n) + k = [age_weight[i : i + _b // n].max() for i in _rb] # noqa E203 g_a_k = np.square(adjusted_r + k) / (var_estimate + np.square(k)) g_b_k = (var_estimate + np.square(k)) / (adjusted_r + k) - summed_b = sum_by_n(b, 1.0, n) res = [] for i in range(len(adjusted_r)): if adjusted_r[i] == 0: @@ -303,18 +342,21 @@ def indirect_age_standardization(e, b, s_e, s_b, n, alpha=0.05): Parameters ---------- - e : array - (n*h, 1), event variable measured for each age group across n spatial units - b : array - (n*h, 1), population at risk variable measured for each age group across n spatial units - s_e : array - (n*h, 1), event variable measured for each age group across n spatial units in a standard population - s_b : array - (n*h, 1), population variable measured for each age group across n spatial units in a standard population - n : integer - the number of spatial units - alpha : float - significance level for confidence interval + e : array + (n*h, 1), event variable measured for each age group across n spatial units + b : array + (n*h, 1), population at risk variable measured for each age + group across n spatial units + s_e : array + (n*h, 1), event variable measured for each age group across + n spatial units in a standard population + s_b : array + (n*h, 1), population variable measured for each age group across + n spatial units in a standard population + n : integer + the number of spatial units + alpha : float + significance level for confidence interval Notes ----- @@ -342,13 +384,14 @@ def indirect_age_standardization(e, b, s_e, s_b, n, alpha=0.05): >>> b = np.array([100, 100, 110, 90, 100, 90, 110, 90]) - For indirect age standardization, we also need the data for standard population and event. - Standard population is a reference population-at-risk (e.g., population distribution for the U.S.) - whose age distribution can be used as a benchmarking point for comparing age distributions - across regions (e.g., popoulation distribution for Arizona and California). - When the same concept is applied to the event variable, - we call it standard event (e.g., the number of cancer patients in the U.S.). - Two additional arrays including standard population and event are created. + For indirect age standardization, we also need the data for standard population + and event. Standard population is a reference population-at-risk (e.g., + population distribution for the U.S.) whose age distribution can be used as a + benchmarking point for comparing age distributions across regions (e.g., + popoulation distribution for Arizona and California). When the same concept is + applied to the event variable, we call it standard event (e.g., the number of + cancer patients in the U.S.). Two additional arrays including standard population + and event are created. >>> s_e = np.array([100, 45, 120, 100, 50, 30, 200, 80]) >>> s_b = np.array([1000, 900, 1000, 900, 1000, 900, 1000, 900]) @@ -384,16 +427,19 @@ def standardized_mortality_ratio(e, b, s_e, s_b, n): Parameters ---------- - e : array - (n*h, 1), event variable measured for each age group across n spatial units - b : array - (n*h, 1), population at risk variable measured for each age group across n spatial units - s_e : array - (n*h, 1), event variable measured for each age group across n spatial units in a standard population - s_b : array - (n*h, 1), population variable measured for each age group across n spatial units in a standard population - n : integer - the number of spatial units + e : array + (n*h, 1), event variable measured for each age group across n spatial units + b : array + (n*h, 1), population at risk variable measured for each age + group across n spatial units + s_e : array + (n*h, 1), event variable measured for each age group across + n spatial units in a standard population + s_b : array + (n*h, 1), population variable measured for each age group + across n spatial units in a standard population + n : integer + the number of spatial units Notes ----- @@ -452,14 +498,14 @@ def choynowski(e, b, n, threshold=None): Parameters ---------- - e : array(n*h, 1) - event variable measured for each age group across n spatial units - b : array(n*h, 1) - population at risk variable measured for each age group across n spatial units - n : integer - the number of spatial units - threshold : float - Returns zero for any p-value greater than threshold + e : array(n*h, 1) + event variable measured for each age group across n spatial units + b : array(n*h, 1) + population at risk variable measured for each age group across n spatial units + n : integer + the number of spatial units + threshold : float + Returns zero for any p-value greater than threshold Notes ----- @@ -563,6 +609,7 @@ def assuncao_rate(e, b): ebi_v = ebi_a + ebi_b / b return (y - ebi_b) / np.sqrt(ebi_v) + class _Smoother(object): """ This is a helper class that implements things that all smoothers should do. @@ -572,11 +619,12 @@ class _Smoother(object): maybe headbanging triples), since they're literally only inits + one attribute. """ + def __init__(self): pass @classmethod - def by_col(cls, df, e,b, inplace=False, **kwargs): + def by_col(cls, df, e, b, inplace=False, **kwargs): """ Compute smoothing by columns in a dataframe. @@ -616,16 +664,19 @@ def by_col(cls, df, e,b, inplace=False, **kwargs): try: assert len(e) == len(b) except AssertionError: - raise ValueError('There is no one-to-one mapping between event' - ' variable and population at risk variable!') - for ei, bi in zip(e,b): + raise ValueError( + "There is no one-to-one mapping between event" + " variable and population at risk variable!" + ) + for ei, bi in zip(e, b): ename = ei bname = bi ei = df[ename] bi = df[bname] - outcol = '_'.join(('-'.join((ename, bname)), cls.__name__.lower())) + outcol = "_".join(("-".join((ename, bname)), cls.__name__.lower())) df[outcol] = cls(ei, bi, **kwargs).r + class Excess_Risk(_Smoother): """Excess Risk @@ -649,8 +700,8 @@ class Excess_Risk(_Smoother): >>> import libpysal >>> stl = libpysal.io.open(libpysal.examples.get_path('stl_hom.csv'), 'r') - The 11th and 14th columns in stl_hom.csv includes the number of homocides and population. - Creating two arrays from these columns. + The 11th and 14th columns in stl_hom.csv includes the number + of homocides and population. Creating two arrays from these columns. >>> stl_e, stl_b = np.array(stl[:,10]), np.array(stl[:,13]) @@ -658,7 +709,8 @@ class Excess_Risk(_Smoother): >>> er = Excess_Risk(stl_e, stl_b) - Extracting the excess risk values through the property r of the Excess_Risk instance, er + Extracting the excess risk values through + the property r of the Excess_Risk instance, er >>> er.r[:10] array([[0.20665681], @@ -673,9 +725,10 @@ class Excess_Risk(_Smoother): [0.25821905]]) """ + def __init__(self, e, b): - e = np.asarray(e).reshape(-1,1) - b = np.asarray(b).reshape(-1,1) + e = np.asarray(e).reshape(-1, 1) + b = np.asarray(b).reshape(-1, 1) r_mean = e.sum() * 1.0 / b.sum() self.r = e * 1.0 / (b * r_mean) @@ -705,8 +758,8 @@ class Empirical_Bayes(_Smoother): >>> stl_ex = libpysal.examples.load_example('stl') >>> stl = libpysal.io.open(stl_ex.get_path('stl_hom.csv'), 'r') - The 11th and 14th columns in stl_hom.csv includes the number of homocides and population. - Creating two arrays from these columns. + The 11th and 14th columns in stl_hom.csv includes + the number of homocides and population. Creating two arrays from these columns. >>> stl_e, stl_b = np.array(stl[:,10]), np.array(stl[:,13]) @@ -714,7 +767,8 @@ class Empirical_Bayes(_Smoother): >>> eb = Empirical_Bayes(stl_e, stl_b) - Extracting the risk values through the property r of the Empirical_Bayes instance, eb + Extracting the risk values through the property + r of the Empirical_Bayes instance, eb >>> eb.r[:10] array([[2.36718950e-05], @@ -729,9 +783,10 @@ class Empirical_Bayes(_Smoother): [3.02748380e-05]]) """ + def __init__(self, e, b): - e = np.asarray(e).reshape(-1,1) - b = np.asarray(b).reshape(-1,1) + e = np.asarray(e).reshape(-1, 1) + b = np.asarray(b).reshape(-1, 1) e_sum, b_sum = e.sum() * 1.0, b.sum() * 1.0 r_mean = e_sum / b_sum rate = e * 1.0 / b @@ -742,6 +797,7 @@ def __init__(self, e, b): weight = r_var / (r_var + r_mean / b) self.r = weight * rate + (1.0 - weight) * r_mean + class _Spatial_Smoother(_Smoother): """ This is a helper class that implements things that all the things that @@ -753,11 +809,12 @@ class _Spatial_Smoother(_Smoother): maybe headbanging triples), since they're literally only inits + one attribute. """ + def __init__(self): pass @classmethod - def by_col(cls, df, e,b, w=None, inplace=False, **kwargs): + def by_col(cls, df, e, b, w=None, inplace=False, **kwargs): """ Compute smoothing by columns in a dataframe. @@ -804,9 +861,11 @@ def by_col(cls, df, e,b, w=None, inplace=False, **kwargs): if isinstance(w, W): found = True if not found: - raise Exception('Weights not provided and no weights attached to frame!' - ' Please provide a weight or attach a weight to the' - ' dataframe') + raise Exception( + "Weights not provided and no weights attached to frame!" + " Please provide a weight or attach a weight to the" + " dataframe" + ) if isinstance(w, W): w = [w] * len(e) if len(b) == 1 and len(e) > 1: @@ -814,16 +873,19 @@ def by_col(cls, df, e,b, w=None, inplace=False, **kwargs): try: assert len(e) == len(b) except AssertionError: - raise ValueError('There is no one-to-one mapping between event' - ' variable and population at risk variable!') + raise ValueError( + "There is no one-to-one mapping between event" + " variable and population at risk variable!" + ) for ei, bi, wi in zip(e, b, w): ename = ei bname = bi ei = df[ename] bi = df[bname] - outcol = '_'.join(('-'.join((ename, bname)), cls.__name__.lower())) + outcol = "_".join(("-".join((ename, bname)), cls.__name__.lower())) df[outcol] = cls(ei, bi, w=wi, **kwargs).r + class Spatial_Empirical_Bayes(_Spatial_Smoother): """Spatial Empirical Bayes Smoothing @@ -850,8 +912,8 @@ class Spatial_Empirical_Bayes(_Spatial_Smoother): >>> stl_ex = libpysal.examples.load_example('stl') >>> stl = libpysal.io.open(stl_ex.get_path('stl_hom.csv'), 'r') - The 11th and 14th columns in stl_hom.csv includes the number of homocides and population. - Creating two arrays from these columns. + The 11th and 14th columns in stl_hom.csv includes the number + of homocides and population. Creating two arrays from these columns. >>> stl_e, stl_b = np.array(stl[:,10]), np.array(stl[:,13]) @@ -860,11 +922,13 @@ class Spatial_Empirical_Bayes(_Spatial_Smoother): >>> stl_w = libpysal.io.open(stl_ex.get_path('stl.gal'), 'r').read() Ensuring that the elements in the spatial weights instance are ordered - by the given sequential numbers from 1 to the number of observations in stl_hom.csv + by the given sequential numbers from 1 to the number of observations + in stl_hom.csv >>> if not stl_w.id_order_set: stl_w.id_order = range(1,len(stl) + 1) - Creating an instance of Spatial_Empirical_Bayes class using stl_e, stl_b, and stl_w + Creating an instance of Spatial_Empirical_Bayes class + using stl_e, stl_b, and stl_w >>> from esda.smoothing import Spatial_Empirical_Bayes >>> s_eb = Spatial_Empirical_Bayes(stl_e, stl_b, stl_w) @@ -883,14 +947,18 @@ class Spatial_Empirical_Bayes(_Spatial_Smoother): [3.73034109e-05], [3.47270722e-05]]) """ + def __init__(self, e, b, w): if not w.id_order_set: - raise ValueError("w id_order must be set to align with the order of e an b") - e = np.asarray(e).reshape(-1,1) - b = np.asarray(b).reshape(-1,1) + raise ValueError( + "The `id_order` of `w` must be set to " + "align with the order of `e` and `b`" + ) + e = np.asarray(e).reshape(-1, 1) + b = np.asarray(b).reshape(-1, 1) r_mean = Spatial_Rate(e, b, w).r rate = e * 1.0 / b - r_var_left = np.ones_like(e) * 1. + r_var_left = np.ones_like(e) * 1.0 ngh_num = np.ones_like(e) bi = slag(w, b) + b for i, idv in enumerate(w.id_order): @@ -905,6 +973,7 @@ def __init__(self, e, b, w): r_var[r_var < 0] = 0.0 self.r = r_mean + (rate - r_mean) * (r_var / (r_var + (r_mean / b))) + class Spatial_Rate(_Spatial_Smoother): """Spatial Rate Smoothing @@ -931,8 +1000,8 @@ class Spatial_Rate(_Spatial_Smoother): >>> stl_ex = libpysal.examples.load_example('stl') >>> stl = libpysal.io.open(stl_ex.get_path('stl_hom.csv'), 'r') - The 11th and 14th columns in stl_hom.csv includes the number of homocides and population. - Creating two arrays from these columns. + The 11th and 14th columns in stl_hom.csv includes the + number of homocides and population. Creating two arrays from these columns. >>> stl_e, stl_b = np.array(stl[:,10]), np.array(stl[:,13]) @@ -941,7 +1010,8 @@ class Spatial_Rate(_Spatial_Smoother): >>> stl_w = libpysal.io.open(stl_ex.get_path('stl.gal'), 'r').read() Ensuring that the elements in the spatial weights instance are ordered - by the given sequential numbers from 1 to the number of observations in stl_hom.csv + by the given sequential numbers from 1 to the number + of observations in stl_hom.csv >>> if not stl_w.id_order_set: stl_w.id_order = range(1,len(stl) + 1) @@ -964,16 +1034,19 @@ class Spatial_Rate(_Spatial_Smoother): [4.26204928e-05], [3.47270722e-05]]) """ + def __init__(self, e, b, w): if not w.id_order_set: - raise ValueError("w id_order must be set to align with the order of e and b") + raise ValueError( + "w id_order must be set to align with the order of e and b" + ) else: - e = np.asarray(e).reshape(-1,1) - b = np.asarray(b).reshape(-1,1) - w.transform = 'b' + e = np.asarray(e).reshape(-1, 1) + b = np.asarray(b).reshape(-1, 1) + w.transform = "b" w_e, w_b = slag(w, e), slag(w, b) self.r = (e + w_e) / (b + w_b) - w.transform = 'o' + w.transform = "o" class Kernel_Smoother(_Spatial_Smoother): @@ -1020,7 +1093,8 @@ class Kernel_Smoother(_Spatial_Smoother): >>> kr = Kernel_Smoother(e, b, kw) - Extracting the smoothed rates through the property r of the Kernel_Smoother instance + Extracting the smoothed rates through the + property r of the Kernel_Smoother instance >>> kr.r array([[0.10543301], @@ -1030,14 +1104,17 @@ class Kernel_Smoother(_Spatial_Smoother): [0.04756872], [0.04845298]]) """ + def __init__(self, e, b, w): if type(w) != Kernel: - raise Error('w must be an instance of Kernel weights') + raise ValueError("w must be an instance of Kernel weights") if not w.id_order_set: - raise ValueError("w id_order must be set to align with the order of e and b") + raise ValueError( + "w id_order must be set to align with the order of e and b" + ) else: - e = np.asarray(e).reshape(-1,1) - b = np.asarray(b).reshape(-1,1) + e = np.asarray(e).reshape(-1, 1) + b = np.asarray(b).reshape(-1, 1) w_e, w_b = slag(w, e), slag(w, b) self.r = w_e / w_b @@ -1047,22 +1124,23 @@ class Age_Adjusted_Smoother(_Spatial_Smoother): Parameters ---------- - e : array (n*h, 1) - event variable measured for each age group across n spatial units - b : array (n*h, 1) - population at risk variable measured for each age group across n spatial units - w : spatial weights instance - s : array (n*h, 1) - standard population for each age group across n spatial units + e : array (n*h, 1) + event variable measured for each age group across n spatial units + b : array (n*h, 1) + population at risk variable measured for each age group across n spatial units + w : spatial weights instance + s : array (n*h, 1) + standard population for each age group across n spatial units Attributes ---------- - r : array (n, 1) - rate values from spatial rate smoothing + r : array (n, 1) + rate values from spatial rate smoothing Notes ----- - Weights used to smooth age-specific events and populations are simple binary weights + Weights used to smooth age-specific events + and populations are simple binary weights Examples -------- @@ -1097,34 +1175,36 @@ class Age_Adjusted_Smoother(_Spatial_Smoother): >>> ar = Age_Adjusted_Smoother(e, b, kw, s) - Extracting the smoothed rates through the property r of the Age_Adjusted_Smoother instance + Extracting the smoothed rates through the property r of + the Age_Adjusted_Smoother instance >>> ar.r array([0.10519625, 0.08494318, 0.06440072, 0.06898604, 0.06952076, 0.05020968]) """ + def __init__(self, e, b, w, s, alpha=0.05): e = np.asarray(e).reshape(-1, 1) b = np.asarray(b).reshape(-1, 1) s = np.asarray(s).flatten() t = len(e) h = t // w.n - w.transform = 'b' + w.transform = "b" e_n, b_n = [], [] for i in range(h): e_n.append(slag(w, e[i::h]).tolist()) b_n.append(slag(w, b[i::h]).tolist()) - e_n = np.array(e_n).reshape((1, t), order='F')[0] - b_n = np.array(b_n).reshape((1, t), order='F')[0] + e_n = np.array(e_n).reshape((1, t), order="F")[0] + b_n = np.array(b_n).reshape((1, t), order="F")[0] e_n = e_n.reshape(s.shape) b_n = b_n.reshape(s.shape) r = direct_age_standardization(e_n, b_n, s, w.n, alpha=alpha) self.r = np.array([i[0] for i in r]) - w.transform = 'o' + w.transform = "o" - @_requires('pandas') + @_requires("pandas") @classmethod - def by_col(cls, df, e,b, w=None, s=None, **kwargs): + def by_col(cls, df, e, b, w=None, s=None, **kwargs): """ Compute smoothing by columns in a dataframe. @@ -1162,6 +1242,7 @@ def by_col(cls, df, e,b, w=None, s=None, **kwargs): if s is None: raise Exception('Standard population variable "s" must be supplied.') import pandas as pd + if isinstance(e, str): e = [e] if isinstance(b, str): @@ -1176,14 +1257,17 @@ def by_col(cls, df, e,b, w=None, s=None, **kwargs): found = True break if not found: - raise Exception('Weights not provided and no weights attached to frame!' - ' Please provide a weight or attach a weight to the' - ' dataframe.') + raise Exception( + "Weights not provided and no weights attached to frame!" + " Please provide a weight or attach a weight to the" + " dataframe." + ) if isinstance(w, W): w = [w] * len(e) if not all(isinstance(wi, W) for wi in w): - raise Exception('Weights object must be an instance of ' - ' libpysal.weights.W!') + raise Exception( + "Weights object must be an instance of " " libpysal.weights.W!" + ) b = b * len(e) if len(b) == 1 and len(e) > 1 else b s = s * len(e) if len(s) == 1 and len(e) > 1 else s try: @@ -1191,17 +1275,18 @@ def by_col(cls, df, e,b, w=None, s=None, **kwargs): assert len(e) == len(s) assert len(e) == len(w) except AssertionError: - raise ValueError('There is no one-to-one mapping between event' - ' variable and population at risk variable, and ' - ' standard population variable, and spatial ' - ' weights!') + raise ValueError( + "There is no one-to-one mapping between event" + " variable and population at risk variable, and " + " standard population variable, and spatial " + " weights!" + ) rdf = [] max_len = 0 for ei, bi, wi, si in zip(e, b, w, s): ename = ei bname = bi - h = len(ei) // wi.n - outcol = '_'.join(('-'.join((ename, bname)), cls.__name__.lower())) + outcol = "_".join(("-".join((ename, bname)), cls.__name__.lower())) this_r = cls(df[ei], df[bi], w=wi, s=df[si], **kwargs).r max_len = 0 if len(this_r) > max_len else max_len rdf.append((outcol, this_r.tolist())) @@ -1238,8 +1323,8 @@ class Disk_Smoother(_Spatial_Smoother): >>> stl_ex = libpysal.examples.load_example('stl') >>> stl = libpysal.io.open(stl_ex.get_path('stl_hom.csv'), 'r') - The 11th and 14th columns in stl_hom.csv includes the number of homocides and population. - Creating two arrays from these columns. + The 11th and 14th columns in stl_hom.csv includes the + number of homocides and population. Creating two arrays from these columns. >>> stl_e, stl_b = np.array(stl[:,10]), np.array(stl[:,13]) @@ -1248,7 +1333,8 @@ class Disk_Smoother(_Spatial_Smoother): >>> stl_w = libpysal.io.open(stl_ex.get_path('stl.gal'), 'r').read() Ensuring that the elements in the spatial weights instance are ordered - by the given sequential numbers from 1 to the number of observations in stl_hom.csv + by the given sequential numbers from 1 to the number of + observations in stl_hom.csv >>> if not stl_w.id_order_set: stl_w.id_order = range(1,len(stl) + 1) @@ -1274,15 +1360,17 @@ class Disk_Smoother(_Spatial_Smoother): def __init__(self, e, b, w): if not w.id_order_set: - raise ValueError("w id_order must be set to align with the order of e and b") + raise ValueError( + "w id_order must be set to align with the order of e and b" + ) else: - e = np.asarray(e).reshape(-1,1) - b = np.asarray(b).reshape(-1,1) + e = np.asarray(e).reshape(-1, 1) + b = np.asarray(b).reshape(-1, 1) r = e * 1.0 / b weight_sum = [] for i in w.id_order: weight_sum.append(sum(w.weights[i])) - self.r = slag(w, r) / np.array(weight_sum).reshape(-1,1) + self.r = slag(w, r) / np.array(weight_sum).reshape(-1, 1) class Spatial_Median_Rate(_Spatial_Smoother): @@ -1318,8 +1406,8 @@ class Spatial_Median_Rate(_Spatial_Smoother): >>> stl_ex = libpysal.examples.load_example('stl') >>> stl = libpysal.io.open(stl_ex.get_path('stl_hom.csv'), 'r') - The 11th and 14th columns in stl_hom.csv includes the number of homocides and population. - Creating two arrays from these columns. + The 11th and 14th columns in stl_hom.csv includes the number of + homocides and population. Creating two arrays from these columns. >>> stl_e, stl_b = np.array(stl[:,10]), np.array(stl[:,13]) @@ -1328,7 +1416,8 @@ class Spatial_Median_Rate(_Spatial_Smoother): >>> stl_w = libpysal.io.open(stl_ex.get_path('stl.gal'), 'r').read() Ensuring that the elements in the spatial weights instance are ordered - by the given sequential numbers from 1 to the number of observations in stl_hom.csv + by the given sequential numbers from 1 to the number of + observations in stl_hom.csv >>> if not stl_w.id_order_set: stl_w.id_order = range(1,len(stl) + 1) @@ -1336,7 +1425,8 @@ class Spatial_Median_Rate(_Spatial_Smoother): >>> smr0 = Spatial_Median_Rate(stl_e,stl_b,stl_w) - Extracting the computed rates through the property r of the Spatial_Median_Rate instance + Extracting the computed rates through the property + r of the Spatial_Median_Rate instance >>> smr0.r[:10] array([3.96047383e-05, 3.55386859e-05, 3.28308921e-05, 4.30731238e-05, @@ -1347,7 +1437,8 @@ class Spatial_Median_Rate(_Spatial_Smoother): >>> smr1 = Spatial_Median_Rate(stl_e,stl_b,stl_w,iteration=5) - Extracting the computed rates through the property r of the Spatial_Median_Rate instance + Extracting the computed rates through the property r + of the Spatial_Median_Rate instance >>> smr1.r[:10] array([3.11293620e-05, 2.95956330e-05, 3.11293620e-05, 3.10159267e-05, @@ -1359,7 +1450,8 @@ class Spatial_Median_Rate(_Spatial_Smoother): >>> smr2 = Spatial_Median_Rate(stl_e,stl_b,stl_w,aw=stl_b) - Extracting the computed rates through the property r of the Spatial_Median_Rate instance + Extracting the computed rates through the property + r of the Spatial_Median_Rate instance >>> smr2.r[:10] array([5.77412020e-05, 4.46449551e-05, 5.77412020e-05, 5.77412020e-05, @@ -1371,7 +1463,8 @@ class Spatial_Median_Rate(_Spatial_Smoother): >>> smr3 = Spatial_Median_Rate(stl_e,stl_b,stl_w,aw=stl_b,iteration=5) - Extracting the computed rates through the property r of the Spatial_Median_Rate instance + Extracting the computed rates through the property + r of the Spatial_Median_Rate instance >>> smr3.r[:10] array([3.61363528e-05, 4.46449551e-05, 3.61363528e-05, 3.61363528e-05, @@ -1379,9 +1472,12 @@ class Spatial_Median_Rate(_Spatial_Smoother): 3.61363528e-05, 4.46449551e-05]) >>> """ + def __init__(self, e, b, w, aw=None, iteration=1): if not w.id_order_set: - raise ValueError("w id_order must be set to align with the order of e and b") + raise ValueError( + "w id_order must be set to align with the order of e and b" + ) e = np.asarray(e).flatten() b = np.asarray(b).flatten() self.r = e * 1.0 / b @@ -1464,8 +1560,8 @@ class Spatial_Filtering(_Smoother): >>> bbox = [[-92.700676, 36.881809], [-87.916573, 40.3295669]] - The 11th and 14th columns in stl_hom.csv includes the number of homocides and population. - Creating two arrays from these columns. + The 11th and 14th columns in stl_hom.csv includes the number + of homocides and population. Creating two arrays from these columns. >>> stl_e, stl_b = np.array(stl[:,10]), np.array(stl[:,13]) @@ -1474,7 +1570,8 @@ class Spatial_Filtering(_Smoother): >>> sf_0 = Spatial_Filtering(bbox,d,stl_e,stl_b,10,10,r=2) - Extracting the resulting rates through the property r of the Spatial_Filtering instance + Extracting the resulting rates through the property + r of the Spatial_Filtering instance >>> sf_0.r[:10] array([4.23561763e-05, 4.45290850e-05, 4.56456221e-05, 4.49133384e-05, @@ -1491,7 +1588,8 @@ class Spatial_Filtering(_Smoother): >>> sf.r.shape (100,) - Extracting the resulting rates through the property r of the Spatial_Filtering instance + Extracting the resulting rates through the property + r of the Spatial_Filtering instance >>> sf.r[:10] array([3.73728738e-05, 4.04456300e-05, 4.04456300e-05, 3.81035327e-05, @@ -1500,13 +1598,15 @@ class Spatial_Filtering(_Smoother): """ def __init__(self, bbox, data, e, b, x_grid, y_grid, r=None, pop=None): - e= np.asarray(e).reshape(-1,1) - b= np.asarray(b).reshape(-1,1) + e = np.asarray(e).reshape(-1, 1) + b = np.asarray(b).reshape(-1, 1) data_tree = KDTree(data) x_range = bbox[1][0] - bbox[0][0] y_range = bbox[1][1] - bbox[0][1] - x, y = np.mgrid[bbox[0][0]:bbox[1][0]:float(x_range) / x_grid, - bbox[0][1]:bbox[1][1]:float(y_range) / y_grid] + x, y = np.mgrid[ + bbox[0][0] : bbox[1][0] : float(x_range) / x_grid, # noqa E203 + bbox[0][1] : bbox[1][1] : float(y_range) / y_grid, # noqa E203 + ] self.grid = list(zip(x.ravel(), y.ravel())) self.r = [] if r is None and pop is None: @@ -1528,9 +1628,9 @@ def __init__(self, bbox, data, e, b, x_grid, y_grid, r=None, pop=None): self.r.append(e_n_f[-1] * 1.0 / b_n_f[-1]) self.r = np.array(self.r) - @_requires('pandas') + @_requires("pandas") @classmethod - def by_col(cls, df, e, b, x_grid, y_grid, geom_col='geometry', **kwargs): + def by_col(cls, df, e, b, x_grid, y_grid, geom_col="geometry", **kwargs): """ Compute smoothing by columns in a dataframe. The bounding box and point information is computed from the geometry column. @@ -1562,6 +1662,7 @@ def by_col(cls, df, e, b, x_grid, y_grid, geom_col='geometry', **kwargs): cells. """ import pandas as pd + # prep for application over multiple event/population pairs if isinstance(e, str): e = [e] @@ -1580,16 +1681,18 @@ def by_col(cls, df, e, b, x_grid, y_grid, geom_col='geometry', **kwargs): res = [] for ename, bname, xgi, ygi in zip(e, b, x_grid, y_grid): r = cls(bbox, data, df[ename], df[bname], xgi, ygi, **kwargs) - grid = np.asarray(r.grid).reshape(-1,2) - name = '_'.join(('-'.join((ename, bname)), cls.__name__.lower())) - colnames = ('_'.join((name, suffix)) for suffix in ['X', 'Y', 'R']) - items = [(name, col) for name,col in zip(colnames, [grid[:,0], - grid[:,1], - r.r])] + grid = np.asarray(r.grid).reshape(-1, 2) + name = "_".join(("-".join((ename, bname)), cls.__name__.lower())) + colnames = ("_".join((name, suffix)) for suffix in ["X", "Y", "R"]) + items = [ + (name, col) + for name, col in zip(colnames, [grid[:, 0], grid[:, 1], r.r]) + ] res.append(pd.DataFrame.from_dict(dict(items))) outdf = pd.concat(res) return outdf + class Headbanging_Triples(object): """Generate a pseudo spatial weights instance that contains headbanging triples @@ -1614,7 +1717,8 @@ class Headbanging_Triples(object): key is observation record id, value is a list of the following: tuple of original triple observations distance between original triple observations - distance between an original triple observation and its extrapolated point + distance between an original triple + observation and its extrapolated point Examples -------- @@ -1626,7 +1730,9 @@ class Headbanging_Triples(object): Reading data in stl_hom.csv into stl_db to extract values for event and population-at-risk variables - >>> stl_db = libpysal.io.open(libpysal.examples.get_path('stl_hom.csv'),'r') # doctest: +SKIP + >>> stl_db = libpysal.io.open( + ... libpysal.examples.get_path('stl_hom.csv'),'r' + ... ) # doctest: +SKIP Reading the stl data in the WKT format so that we can easily extract polygon centroids @@ -1665,7 +1771,9 @@ class Headbanging_Triples(object): Opening sids2.shp file >>> import libpysal - >>> sids = libpysal.io.open(libpysal.examples.get_path('sids2.shp'),'r') # doctest: +SKIP + >>> sids = libpysal.io.open( + ... libpysal.examples.get_path('sids2.shp'),'r' + ... ) # doctest: +SKIP Extracting the centroids of polygons in the sids data @@ -1720,17 +1828,20 @@ class Headbanging_Triples(object): >>> round(extrapolated[1],5), round(extrapolated[2],6) # doctest: +SKIP (0.33753, 0.302707) """ + def __init__(self, data, w, k=5, t=3, angle=135.0, edgecor=False): - raise DeprecationWarning('Deprecated') + raise DeprecationWarning("Deprecated") if k < 3: - raise ValueError("w should be NeareastNeighbors instance & the number of neighbors should be more than 3.") + raise ValueError( + "`w` should be a `NeareastNeighbors` instance & the number " + "of neighbors should be more than 3." + ) if not w.id_order_set: raise ValueError("w id_order must be set to align with the order of data") self.triples, points = {}, {} for i, pnt in enumerate(data): ng = w.neighbor_offsets[i] - points[(i, Point(pnt))] = dict(list(zip(ng, [Point(d) - for d in data[ng]]))) + points[(i, Point(pnt))] = dict(list(zip(ng, [Point(d) for d in data[ng]]))) for i, pnt in list(points.keys()): ng = points[(i, pnt)] tr, tr_dis = {}, [] @@ -1742,8 +1853,8 @@ def __init__(self, data, w, k=5, t=3, angle=135.0, edgecor=False): if len(tr) > t: for c in list(tr.keys()): p2, p3 = tr[c] - tr_dis.append((get_segment_point_dist( - LineSegment(p2, p3), pnt), c)) + _seg = LineSegment(p2, p3) + tr_dis.append((get_segment_point_dist(_seg, pnt), c)) tr_dis = sorted(tr_dis)[:t] self.triples[i] = [trp for dis, trp in tr_dis] else: @@ -1768,15 +1879,27 @@ def __init__(self, data, w, k=5, t=3, angle=135.0, edgecor=False): dist_p_p2 = get_points_dist(point, p2) dist_p_p3 = get_points_dist(point, p3) if dist_p_p2 <= dist_p_p3: - ray1, ray2, s_pnt, dist, c = Ray(p2, point), Ray(p2, p3), p2, dist_p_p2, (ps[p2], ps[p3]) + ray1, ray2, s_pnt, dist, c = ( + Ray(p2, point), + Ray(p2, p3), + p2, + dist_p_p2, + (ps[p2], ps[p3]), + ) else: - ray1, ray2, s_pnt, dist, c = Ray(p3, point), Ray(p3, p2), p3, dist_p_p3, (ps[p3], ps[p2]) + ray1, ray2, s_pnt, dist, c = ( + Ray(p3, point), + Ray(p3, p2), + p3, + dist_p_p3, + (ps[p3], ps[p2]), + ) ang = get_angle_between(ray1, ray2) - if ang >= 90 + angle / 2 or (ang < 0 and ang + 360 >= 90 + angle / 2): - ex_point = get_point_at_angle_and_dist( - ray1, angle, dist) - extra = [c, dist_p2_p3, get_points_dist( - s_pnt, ex_point)] + if ang >= 90 + angle / 2 or ( + ang < 0 and ang + 360 >= 90 + angle / 2 + ): + ex_point = get_point_at_angle_and_dist(ray1, angle, dist) + extra = [c, dist_p2_p3, get_points_dist(s_pnt, ex_point)] break self.triples[ps[point]].append(extra[0]) self.extra[ps[point]] = extra @@ -1805,109 +1928,110 @@ class Headbanging_Median_Rate(object): Examples -------- - >>> import libpysal # doctest: +SKIP - + >>> import libpysal # doctest: +SKIP opening the sids2 shapefile - >>> sids = libpysal.io.open(libpysal.examples.get_path('sids2.shp'), 'r') # doctest: +SKIP - + >>> sids = libpysal.io.open( + ... libpysal.examples.get_path('sids2.shp'), 'r' + ... ) # doctest: +SKIP extracting the centroids of polygons in the sids2 data - >>> sids_d = np.array([i.centroid for i in sids]) # doctest: +SKIP - + >>> sids_d = np.array([i.centroid for i in sids]) # doctest: +SKIP creating a 5-nearest neighbors weights from the centroids - >>> sids_w = libpysal.weights.KNN(sids_d,k=5) # doctest: +SKIP - + >>> sids_w = libpysal.weights.KNN(sids_d,k=5) # doctest: +SKIP ensuring that the members in sids_w are ordered - >>> if not sids_w.id_order_set: sids_w.id_order = sids_w.id_order # doctest: +SKIP - + >>> if not sids_w.id_order_set: sids_w.id_order = sids_w.id_order # doctest: +SKIP finding headbanging triples by using 5 neighbors return outdf - >>> s_ht = Headbanging_Triples(sids_d,sids_w,k=5) # doctest: +SKIP + >>> s_ht = Headbanging_Triples(sids_d,sids_w,k=5) # doctest: +SKIP DeprecationWarning: Deprecated - reading in the sids2 data table - >>> sids_db = libpysal.io.open(libpysal.examples.get_path('sids2.dbf'), 'r') # doctest: +SKIP - + >>> sids_db = libpysal.io.open( + ... libpysal.examples.get_path('sids2.dbf'), 'r' + ... ) # doctest: +SKIP extracting the 10th and 9th columns in the sids2.dbf and using data values as event and population-at-risk variables - >>> s_e, s_b = np.array(sids_db[:,9]), np.array(sids_db[:,8]) # doctest: +SKIP - + >>> s_e, s_b = np.array(sids_db[:,9]), np.array(sids_db[:,8]) # doctest: +SKIP computing headbanging median rates from s_e, s_b, and s_ht - >>> sids_hb_r = Headbanging_Median_Rate(s_e,s_b,s_ht) # doctest: +SKIP - + >>> sids_hb_r = Headbanging_Median_Rate(s_e,s_b,s_ht) # doctest: +SKIP - extracting the computed rates through the property r of the Headbanging_Median_Rate instance + extracting the computed rates through the property r of the + Headbanging_Median_Rate instance - >>> sids_hb_r.r[:5] # doctest: +SKIP + >>> sids_hb_r.r[:5] # doctest: +SKIP array([ 0.00075586, 0. , 0.0008285 , 0.0018315 , 0.00498891]) recomputing headbanging median rates with 5 iterations - >>> sids_hb_r2 = Headbanging_Median_Rate(s_e,s_b,s_ht,iteration=5) # doctest: +SKIP + >>> sids_hb_r2 = Headbanging_Median_Rate( + ... s_e,s_b,s_ht,iteration=5 + ... ) # doctest: +SKIP + extracting the computed rates through the property r of the + Headbanging_Median_Rate instance - extracting the computed rates through the property r of the Headbanging_Median_Rate instance - - >>> sids_hb_r2.r[:5] # doctest: +SKIP + >>> sids_hb_r2.r[:5] # doctest: +SKIP array([ 0.0008285 , 0.00084331, 0.00086896, 0.0018315 , 0.00498891]) recomputing headbanging median rates by considring a set of auxilliary weights - >>> sids_hb_r3 = Headbanging_Median_Rate(s_e,s_b,s_ht,aw=s_b) # doctest: +SKIP - + >>> sids_hb_r3 = Headbanging_Median_Rate(s_e,s_b,s_ht,aw=s_b) # doctest: +SKIP - extracting the computed rates through the property r of the Headbanging_Median_Rate instance + extracting the computed rates through the property r of the + Headbanging_Median_Rate instance - >>> sids_hb_r3.r[:5] # doctest: +SKIP + >>> sids_hb_r3.r[:5] # doctest: +SKIP array([ 0.00091659, 0. , 0.00156838, 0.0018315 , 0.00498891]) """ + def __init__(self, e, b, t, aw=None, iteration=1): - raise DeprecationWarning('Deprecated') + raise DeprecationWarning("Deprecated") self.r = e * 1.0 / b self.tr, self.aw = t.triples, aw - if hasattr(t, 'extra'): + if hasattr(t, "extra"): self.extra = t.extra while iteration: self.__search_headbanging_median() iteration -= 1 def __get_screens(self, id, triples, weighted=False): - r, tr = self.r, self.tr + r = self.r if len(triples) == 0: return r[id] - if hasattr(self, 'extra') and id in self.extra: + if hasattr(self, "extra") and id in self.extra: extra = self.extra trp_r = r[list(triples[0])] # observed rate # plus difference in rate scaled by ratio of extrapolated distance # & observed distance. trp_r[-1] = trp_r[0] + (trp_r[0] - trp_r[-1]) * ( - extra[id][-1] * 1.0 / extra[id][1]) + extra[id][-1] * 1.0 / extra[id][1] + ) trp_r = sorted(trp_r) if not weighted: return r[id], trp_r[0], trp_r[-1] else: trp_aw = self.aw[triples[0]] - extra_w = trp_aw[0] + (trp_aw[0] - trp_aw[- - 1]) * (extra[id][-1] * 1.0 / extra[id][1]) + extra_w = trp_aw[0] + (trp_aw[0] - trp_aw[-1]) * ( + extra[id][-1] * 1.0 / extra[id][1] + ) return r[id], trp_r[0], trp_r[-1], self.aw[id], trp_aw[0] + extra_w if not weighted: lowest, highest = [], [] @@ -1921,19 +2045,23 @@ def __get_screens(self, id, triples, weighted=False): lowest_aw, highest_aw = [], [] for trp in triples: trp_r = r[list(trp)] - dtype = [('r', '%s' % trp_r.dtype), ('w', - '%s' % self.aw.dtype)] + dtype = [("r", "%s" % trp_r.dtype), ("w", "%s" % self.aw.dtype)] trp_r = np.array(list(zip(trp_r, list(trp))), dtype=dtype) - trp_r.sort(order='r') - lowest.append(trp_r['r'][0]) - highest.append(trp_r['r'][-1]) - lowest_aw.append(self.aw[int(round(trp_r['w'][0]))]) - highest_aw.append(self.aw[int(round(trp_r['w'][-1]))]) + trp_r.sort(order="r") + lowest.append(trp_r["r"][0]) + highest.append(trp_r["r"][-1]) + lowest_aw.append(self.aw[int(round(trp_r["w"][0]))]) + highest_aw.append(self.aw[int(round(trp_r["w"][-1]))]) wm_lowest = weighted_median(np.array(lowest), np.array(lowest_aw)) - wm_highest = weighted_median( - np.array(highest), np.array(highest_aw)) + wm_highest = weighted_median(np.array(highest), np.array(highest_aw)) triple_members = flatten(triples, unique=False) - return r[id], wm_lowest, wm_highest, self.aw[id] * len(triples), self.aw[triple_members].sum() + return ( + r[id], + wm_lowest, + wm_highest, + self.aw[id] * len(triples), + self.aw[triple_members].sum(), + ) def __get_median_from_screens(self, screens): if isinstance(screens, float): @@ -1952,17 +2080,16 @@ def __get_median_from_screens(self, screens): return rk def __search_headbanging_median(self): - r, tr = self.r, self.tr + tr = self.tr new_r = [] for k in list(tr.keys()): - screens = self.__get_screens( - k, tr[k], weighted=(self.aw is not None)) + screens = self.__get_screens(k, tr[k], weighted=(self.aw is not None)) new_r.append(self.__get_median_from_screens(screens)) self.r = np.array(new_r) - @_requires('pandas') + @_requires("pandas") @classmethod - def by_col(cls, df, e, b, t=None, geom_col='geometry', inplace=False, **kwargs): + def by_col(cls, df, e, b, t=None, geom_col="geometry", inplace=False, **kwargs): """ Compute smoothing by columns in a dataframe. The bounding box and point information is computed from the geometry column. @@ -1996,12 +2123,12 @@ def by_col(cls, df, e, b, t=None, geom_col='geometry', inplace=False, **kwargs): event/population pairs. If done inplace, there is no return value and `df` is modified in place. """ - import pandas as pd + if not inplace: new = df.copy() cls.by_col(new, e, b, t=t, geom_col=geom_col, inplace=True, **kwargs) return new - import pandas as pd + # prep for application over multiple event/population pairs if isinstance(e, str): e = [e] @@ -2012,10 +2139,10 @@ def by_col(cls, df, e, b, t=None, geom_col='geometry', inplace=False, **kwargs): data = get_points_array(df[geom_col]) - #Headbanging_Triples doesn't take **kwargs, so filter its arguments + # Headbanging_Triples doesn't take **kwargs, so filter its arguments # (self, data, w, k=5, t=3, angle=135.0, edgecor=False): - w = kwargs.pop('w', None) + w = kwargs.pop("w", None) if w is None: found = False for k in df._metadata: @@ -2023,20 +2150,20 @@ def by_col(cls, df, e, b, t=None, geom_col='geometry', inplace=False, **kwargs): if isinstance(w, W): found = True if not found: - raise Exception('Weights not provided and no weights attached to frame!' - ' Please provide a weight or attach a weight to the' - ' dataframe') + raise Exception( + "Weights not provided and no weights attached to frame!" + " Please provide a weight or attach a weight to the" + " dataframe" + ) - k = kwargs.pop('k', 5) - t = kwargs.pop('t', 3) - angle = kwargs.pop('angle', 135.0) - edgecor = kwargs.pop('edgecor', False) + k = kwargs.pop("k", 5) + t = kwargs.pop("t", 3) + angle = kwargs.pop("angle", 135.0) + edgecor = kwargs.pop("edgecor", False) - hbt = Headbanging_Triples(data, w, k=k, t=t, angle=angle, - edgecor=edgecor) + hbt = Headbanging_Triples(data, w, k=k, t=t, angle=angle, edgecor=edgecor) - res = [] for ename, bname in zip(e, b): r = cls(df[ename], df[bname], hbt, **kwargs).r - name = '_'.join(('-'.join((ename, bname)), cls.__name__.lower())) + name = "_".join(("-".join((ename, bname)), cls.__name__.lower())) df[name] = r diff --git a/esda/tabular.py b/esda/tabular.py index 3ba7231c..2dfdffa2 100644 --- a/esda/tabular.py +++ b/esda/tabular.py @@ -1,8 +1,10 @@ # from ...common import requires as _requires import itertools as _it + from libpysal.weights import W + # I would like to define it like this, so that you could make a call like: # Geary(df, 'HOVAL', 'INC', w=W), but this only works in Python3. So, I have to # use a workaround @@ -51,7 +53,7 @@ def _univariate_handler( **kwargs : optional keyword arguments options that are passed directly to the statistic """ - ### Preprocess + # Preprocess if not inplace: new_df = df.copy() _univariate_handler( @@ -77,7 +79,7 @@ def _univariate_handler( " Please provide a weight or attach a weight to the" " dataframe" ) - ### Prep indexes + # Prep indexes if outvals is None: outvals = [] outvals.insert(0, "_statistic") @@ -100,13 +102,13 @@ def _univariate_handler( if isinstance(cols, str): cols = [cols] - ### Make closure around weights & apply columnwise + # Make closure around weights & apply columnwise def column_stat(column): return stat(column.values, w=w, **kwargs) stat_objs = df[cols].apply(column_stat) - ### Assign into dataframe + # Assign into dataframe for col in cols: stat_obj = stat_objs[col] y = kwargs.get("y") diff --git a/esda/tests/test_adbscan.py b/esda/tests/test_adbscan.py index b2dafebb..8440012e 100644 --- a/esda/tests/test_adbscan.py +++ b/esda/tests/test_adbscan.py @@ -1,10 +1,11 @@ import unittest -import pandas -import numpy as np -from .. import adbscan +import numpy as np +import pandas import pytest +from .. import adbscan + class ADBSCAN_Tester(unittest.TestCase): def setUp(self): @@ -271,7 +272,13 @@ def test_get_cluster_boundary(self): # # Single Core # # ------------------------# polys = adbscan.get_cluster_boundary(self.labels, self.db, xy=["x", "y"]) - wkt = "POLYGON ((0.7217553174317995 0.8192869956700687, 0.7605307121989587 0.9086488808086682, 0.9177741225129434 0.8568503024577332, 0.8126209616521135 0.6262871483113925, 0.6125260668293881 0.5475861559192435, 0.5425443680112613 0.7546476915298572, 0.7217553174317995 0.8192869956700687))" + wkt = ( + "POLYGON ((0.7217553174317995 0.8192869956700687, 0.7605307121989587 " + "0.9086488808086682, 0.9177741225129434 0.8568503024577332, " + "0.8126209616521135 0.6262871483113925, 0.6125260668293881 " + "0.5475861559192435, 0.5425443680112613 0.7546476915298572, " + "0.7217553174317995 0.8192869956700687))" + ) self.assertEqual(polys[0].wkt, wkt) # ------------------------# @@ -280,7 +287,13 @@ def test_get_cluster_boundary(self): polys = adbscan.get_cluster_boundary( self.labels, self.db, xy=["x", "y"], n_jobs=-1 ) - wkt = "POLYGON ((0.7217553174317995 0.8192869956700687, 0.7605307121989587 0.9086488808086682, 0.9177741225129434 0.8568503024577332, 0.8126209616521135 0.6262871483113925, 0.6125260668293881 0.5475861559192435, 0.5425443680112613 0.7546476915298572, 0.7217553174317995 0.8192869956700687))" + wkt = ( + "POLYGON ((0.7217553174317995 0.8192869956700687, 0.7605307121989587 " + "0.9086488808086682, 0.9177741225129434 0.8568503024577332, " + "0.8126209616521135 0.6262871483113925, 0.6125260668293881 " + "0.5475861559192435, 0.5425443680112613 0.7546476915298572, " + "0.7217553174317995 0.8192869956700687))" + ) self.assertEqual(polys[0].wkt, wkt) diff --git a/esda/tests/test_gamma.py b/esda/tests/test_gamma.py index 1e84091b..5d9870ce 100644 --- a/esda/tests/test_gamma.py +++ b/esda/tests/test_gamma.py @@ -1,12 +1,17 @@ import unittest + import numpy as np +from libpysal.common import pandas from libpysal.weights.util import lat2W + from ..gamma import Gamma -from libpysal.common import pandas PANDAS_EXTINCT = pandas is None + + class Gamma_Tester(unittest.TestCase): """Unit test for Gamma Index""" + def setUp(self): self.w = lat2W(4, 4) self.y = np.ones(16) @@ -24,7 +29,7 @@ def test_Gamma(self): self.assertAlmostEqual(g.max_g, 20.0) self.assertAlmostEqual(g.mean_g, 11.093093093093094) np.random.seed(12345) - g1 = Gamma(self.y, self.w, operation='s') + g1 = Gamma(self.y, self.w, operation="s") self.assertAlmostEqual(g1.g, 8.0) self.assertAlmostEqual(g1.g_z, -3.7057554345954791) self.assertAlmostEqual(g1.p_sim_g, 0.001) @@ -32,7 +37,7 @@ def test_Gamma(self): self.assertAlmostEqual(g1.max_g, 48.0) self.assertAlmostEqual(g1.mean_g, 25.623623623623622) np.random.seed(12345) - g2 = Gamma(self.y, self.w, operation='a') + g2 = Gamma(self.y, self.w, operation="a") self.assertAlmostEqual(g2.g, 8.0) self.assertAlmostEqual(g2.g_z, -3.7057554345954791) self.assertAlmostEqual(g2.p_sim_g, 0.001) @@ -58,22 +63,23 @@ def func(z, i, j): self.assertAlmostEqual(g4.g_z, 3.1879280354548638) self.assertAlmostEqual(g4.p_sim_g, 0.0030000000000000001) - @unittest.skipIf(PANDAS_EXTINCT, 'missing pandas') + @unittest.skipIf(PANDAS_EXTINCT, "missing pandas") def test_by_col(self): import pandas as pd - df = pd.DataFrame(self.y, columns=['y']) - r1 = Gamma.by_col(df, ['y'], w=self.w) - self.assertIn('y_gamma', r1.columns) - self.assertIn('y_p_sim', r1.columns) + + df = pd.DataFrame(self.y, columns=["y"]) + r1 = Gamma.by_col(df, ["y"], w=self.w) + self.assertIn("y_gamma", r1.columns) + self.assertIn("y_p_sim", r1.columns) this_gamma = np.unique(r1.y_gamma.values) this_pval = np.unique(r1.y_p_sim.values) np.testing.assert_allclose(this_gamma, self.g.g) np.testing.assert_allclose(this_pval, self.g.p_sim) - Gamma.by_col(df, ['y'], inplace=True, operation='s', w=self.w) + Gamma.by_col(df, ["y"], inplace=True, operation="s", w=self.w) this_gamma = np.unique(df.y_gamma.values) this_pval = np.unique(df.y_p_sim.values) np.testing.assert_allclose(this_gamma, 8.0) - np.testing.assert_allclose(this_pval, .001) + np.testing.assert_allclose(this_pval, 0.001) suite = unittest.TestSuite() @@ -82,6 +88,6 @@ def test_by_col(self): a = unittest.TestLoader().loadTestsFromTestCase(i) suite.addTest(a) -if __name__ == '__main__': +if __name__ == "__main__": runner = unittest.TextTestRunner() runner.run(suite) diff --git a/esda/tests/test_geary.py b/esda/tests/test_geary.py index c421f081..18c467b2 100644 --- a/esda/tests/test_geary.py +++ b/esda/tests/test_geary.py @@ -1,14 +1,13 @@ """Geary Unittest.""" import unittest -from libpysal.io import open as popen +import numpy as np from libpysal import examples from libpysal.common import pandas -from libpysal.weights import W +from libpysal.io import open as popen from libpysal.weights import Rook from .. import geary -import numpy as np PANDAS_EXTINCT = pandas is None diff --git a/esda/tests/test_getisord.py b/esda/tests/test_getisord.py index 35ed3a42..35547215 100644 --- a/esda/tests/test_getisord.py +++ b/esda/tests/test_getisord.py @@ -1,9 +1,10 @@ import unittest + import numpy as np +from libpysal.common import ATOL, RTOL, pandas +from libpysal.weights.distance import DistanceBand from .. import getisord -from libpysal.weights.distance import DistanceBand -from libpysal.common import pandas, RTOL, ATOL POINTS = np.array([(10, 10), (20, 10), (40, 10), (15, 20), (30, 20), (30, 30)]) W = DistanceBand(POINTS, threshold=15) diff --git a/esda/tests/test_join_counts.py b/esda/tests/test_join_counts.py index 4bdbbd51..b3fda32d 100644 --- a/esda/tests/test_join_counts.py +++ b/esda/tests/test_join_counts.py @@ -1,14 +1,17 @@ import unittest + import numpy as np +from libpysal.common import pandas +from libpysal.weights.util import lat2W from ..join_counts import Join_Counts -from libpysal.weights.util import lat2W -from libpysal.common import pandas PANDAS_EXTINCT = pandas is None + class Join_Counts_Tester(unittest.TestCase): """Unit test for Join Counts""" + def setUp(self): self.w = lat2W(4, 4) self.y = np.ones(16) @@ -41,12 +44,15 @@ def test_Join_Counts(self): self.assertAlmostEqual(0.001, jc.p_sim_autocorr_pos) self.assertAlmostEqual(0.2653504320039377, jc.sim_autocorr_chi2) - @unittest.skipIf(PANDAS_EXTINCT, 'missing pandas') + @unittest.skipIf(PANDAS_EXTINCT, "missing pandas") def test_by_col(self): import pandas as pd - df = pd.DataFrame(self.y, columns=['y']) + + df = pd.DataFrame(self.y, columns=["y"]) np.random.seed(12345) - r1 = Join_Counts.by_col(df, ['y'], w=self.w, permutations=999) # outvals = ['bb', 'bw', 'ww', 'p_sim_bw', 'p_sim_bb'] + r1 = Join_Counts.by_col( + df, ["y"], w=self.w, permutations=999 + ) # outvals = ['bb', 'bw', 'ww', 'p_sim_bw', 'p_sim_bb'] bb = np.unique(r1.y_bb.values) bw = np.unique(r1.y_bw.values) @@ -59,12 +65,13 @@ def test_by_col(self): self.assertAlmostEqual(bb_p, c.p_sim_bb) self.assertAlmostEqual(bw_p, c.p_sim_bw) + suite = unittest.TestSuite() test_classes = [Join_Counts_Tester] for i in test_classes: a = unittest.TestLoader().loadTestsFromTestCase(i) suite.addTest(a) -if __name__ == '__main__': +if __name__ == "__main__": runner = unittest.TextTestRunner() runner.run(suite) diff --git a/esda/tests/test_ljc.py b/esda/tests/test_ljc.py index 217c13e1..14235421 100644 --- a/esda/tests/test_ljc.py +++ b/esda/tests/test_ljc.py @@ -1,31 +1,32 @@ # based off: https://github.com/pysal/esda/blob/master/esda/tests/test_join_counts.py import unittest + import numpy as np -from libpysal.weights.util import lat2W from libpysal.common import pandas +from libpysal.weights.util import lat2W from esda.join_counts_local import Join_Counts_Local PANDAS_EXTINCT = pandas is None + class Join_Counts_Locals_Tester(unittest.TestCase): """Unit test for Local Join Counts (univariate)""" + def setUp(self): self.w = lat2W(4, 4) self.y = np.ones(16) self.y[0:8] = 0 def test_Join_Counts_Locals(self): - """Test method""" - np.random.seed(12345) - ljc = Join_Counts_Local(connectivity=self.w).fit(self.y) - assert np.array_equal(ljc.LJC, [0, 0, 0, 0, 0, 0, 0, 0, 2, 3, 3, 2, 2, 3, 3, 2]) - - + """Test method""" + np.random.seed(12345) + ljc = Join_Counts_Local(connectivity=self.w).fit(self.y) + assert np.array_equal(ljc.LJC, [0, 0, 0, 0, 0, 0, 0, 0, 2, 3, 3, 2, 2, 3, 3, 2]) + + suite = unittest.TestSuite() -test_classes = [ - Join_Counts_Locals_Tester -] +test_classes = [Join_Counts_Locals_Tester] for i in test_classes: a = unittest.TestLoader().loadTestsFromTestCase(i) suite.addTest(a) diff --git a/esda/tests/test_ljc_bv.py b/esda/tests/test_ljc_bv.py index 8e9af20a..4caa8f8d 100644 --- a/esda/tests/test_ljc_bv.py +++ b/esda/tests/test_ljc_bv.py @@ -1,34 +1,43 @@ # based off: https://github.com/pysal/esda/blob/master/tests/test_join_counts.py import unittest + import numpy as np -from libpysal.weights.util import lat2W from libpysal.common import pandas +from libpysal.weights.util import lat2W from esda.join_counts_local_bv import Join_Counts_Local_BV PANDAS_EXTINCT = pandas is None + class Local_Join_Counts_BV_Tester(unittest.TestCase): """Unit test for Local Join Counts (univariate)""" + def setUp(self): self.w = lat2W(4, 4) self.x = np.ones(16) self.x[0:8] = 0 - self.z = [0,1,0,1,1,1,1,1,0,0,1,1,0,0,1,1] + self.z = [0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1] def test_Local_Join_Counts_BV(self): - """Test method""" - np.random.seed(12345) - ljc_bv_case1 = Join_Counts_Local_BV(connectivity=self.w).fit(self.x, self.z, case="BJC") - ljc_bv_case2 = Join_Counts_Local_BV(connectivity=self.w).fit(self.x, self.z, case="CLC") - assert np.array_equal(ljc_bv_case1.LJC, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0]) - assert np.array_equal(ljc_bv_case2.LJC, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 0, 0, 2, 2]) - + """Test method""" + np.random.seed(12345) + ljc_bv_case1 = Join_Counts_Local_BV(connectivity=self.w).fit( + self.x, self.z, case="BJC" + ) + ljc_bv_case2 = Join_Counts_Local_BV(connectivity=self.w).fit( + self.x, self.z, case="CLC" + ) + assert np.array_equal( + ljc_bv_case1.LJC, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0] + ) + assert np.array_equal( + ljc_bv_case2.LJC, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 0, 0, 2, 2] + ) + suite = unittest.TestSuite() -test_classes = [ - Local_Join_Counts_BV_Tester -] +test_classes = [Local_Join_Counts_BV_Tester] for i in test_classes: a = unittest.TestLoader().loadTestsFromTestCase(i) suite.addTest(a) diff --git a/esda/tests/test_ljc_mv.py b/esda/tests/test_ljc_mv.py index 7a67b7a1..49095961 100644 --- a/esda/tests/test_ljc_mv.py +++ b/esda/tests/test_ljc_mv.py @@ -1,33 +1,36 @@ # based off: https://github.com/pysal/esda/blob/master/tests/test_join_counts.py import unittest + import numpy as np -from libpysal.weights.util import lat2W from libpysal.common import pandas +from libpysal.weights.util import lat2W from esda.join_counts_local_mv import Join_Counts_Local_MV PANDAS_EXTINCT = pandas is None + class Local_Join_Counts_MV_Tester(unittest.TestCase): """Unit test for Local Join Counts (univariate)""" + def setUp(self): self.w = lat2W(4, 4) self.x = np.ones(16) self.x[0:8] = 0 - self.y = [0,1,0,1,1,1,1,1,0,0,1,1,0,0,1,1] - self.z = [0,1,1,1,1,1,1,1,0,0,0,1,0,0,1,1] - + self.y = [0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1] + self.z = [0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1, 1] def test_Local_Join_Counts_MV(self): - """Test method""" - np.random.seed(12345) - ljc_mv = Join_Counts_Local_MV(connectivity=self.w).fit([self.x, self.y, self.z]) - assert np.array_equal(ljc_mv.LJC, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 2]) - + """Test method""" + np.random.seed(12345) + ljc_mv = Join_Counts_Local_MV(connectivity=self.w).fit([self.x, self.y, self.z]) + assert np.array_equal( + ljc_mv.LJC, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 2] + ) + + suite = unittest.TestSuite() -test_classes = [ - Local_Join_Counts_MV_Tester -] +test_classes = [Local_Join_Counts_MV_Tester] for i in test_classes: a = unittest.TestLoader().loadTestsFromTestCase(i) suite.addTest(a) diff --git a/esda/tests/test_local_geary.py b/esda/tests/test_local_geary.py index 42197f90..3cb97c10 100644 --- a/esda/tests/test_local_geary.py +++ b/esda/tests/test_local_geary.py @@ -1,27 +1,26 @@ import unittest + import libpysal -from libpysal.common import pandas, RTOL, ATOL -from esda.geary_local import Geary_Local import numpy as np -PANDAS_EXTINCT = pandas is None +from esda.geary_local import Geary_Local + class Geary_Local_Tester(unittest.TestCase): def setUp(self): np.random.seed(10) self.w = libpysal.io.open(libpysal.examples.get_path("stl.gal")).read() f = libpysal.io.open(libpysal.examples.get_path("stl_hom.txt")) - self.y = np.array(f.by_col['HR8893']) + self.y = np.array(f.by_col["HR8893"]) def test_local_geary(self): lG = Geary_Local(connectivity=self.w).fit(self.y) self.assertAlmostEqual(lG.localG[0], 0.696703432) self.assertAlmostEqual(lG.p_sim[0], 0.19) - + + suite = unittest.TestSuite() -test_classes = [ - Geary_Local_Tester -] +test_classes = [Geary_Local_Tester] for i in test_classes: a = unittest.TestLoader().loadTestsFromTestCase(i) suite.addTest(a) @@ -29,4 +28,3 @@ def test_local_geary(self): if __name__ == "__main__": runner = unittest.TextTestRunner() runner.run(suite) - diff --git a/esda/tests/test_local_geary_mv.py b/esda/tests/test_local_geary_mv.py index aec3dc00..21bb40a2 100644 --- a/esda/tests/test_local_geary_mv.py +++ b/esda/tests/test_local_geary_mv.py @@ -1,29 +1,28 @@ import unittest + import libpysal -from libpysal.common import pandas, RTOL, ATOL -from esda.geary_local_mv import Geary_Local_MV import numpy as np -PANDAS_EXTINCT = pandas is None +from esda.geary_local_mv import Geary_Local_MV + class Geary_Local_MV_Tester(unittest.TestCase): def setUp(self): np.random.seed(100) self.w = libpysal.io.open(libpysal.examples.get_path("stl.gal")).read() f = libpysal.io.open(libpysal.examples.get_path("stl_hom.txt")) - self.y1 = np.array(f.by_col['HR8893']) - self.y2 = np.array(f.by_col['HC8488']) + self.y1 = np.array(f.by_col["HR8893"]) + self.y2 = np.array(f.by_col["HC8488"]) def test_local_geary_mv(self): lG_mv = Geary_Local_MV(connectivity=self.w).fit([self.y1, self.y2]) print(lG_mv.p_sim[0]) self.assertAlmostEqual(lG_mv.localG[0], 0.4096931479581422) self.assertAlmostEqual(lG_mv.p_sim[0], 0.211) - + + suite = unittest.TestSuite() -test_classes = [ - Geary_Local_MV_Tester -] +test_classes = [Geary_Local_MV_Tester] for i in test_classes: a = unittest.TestLoader().loadTestsFromTestCase(i) suite.addTest(a) diff --git a/esda/tests/test_losh.py b/esda/tests/test_losh.py index 6cad83db..8ddc8606 100644 --- a/esda/tests/test_losh.py +++ b/esda/tests/test_losh.py @@ -1,28 +1,27 @@ # based off: https://github.com/pysal/esda/blob/master/tests/test_moran.py#L96 import unittest + import libpysal -from libpysal.common import pandas, RTOL, ATOL -from esda.losh import LOSH import numpy as np -PANDAS_EXTINCT = pandas is None +from esda.losh import LOSH + class Losh_Tester(unittest.TestCase): def setUp(self): np.random.seed(10) self.w = libpysal.io.open(libpysal.examples.get_path("stl.gal")).read() f = libpysal.io.open(libpysal.examples.get_path("stl_hom.txt")) - self.y = np.array(f.by_col['HR8893']) + self.y = np.array(f.by_col["HR8893"]) def test_losh(self): ls = LOSH(connectivity=self.w, inference="chi-square").fit(self.y) self.assertAlmostEqual(ls.Hi[0], 0.77613471) self.assertAlmostEqual(ls.pval[0], 0.22802201) - + + suite = unittest.TestSuite() -test_classes = [ - Losh_Tester -] +test_classes = [Losh_Tester] for i in test_classes: a = unittest.TestLoader().loadTestsFromTestCase(i) suite.addTest(a) @@ -30,4 +29,3 @@ def test_losh(self): if __name__ == "__main__": runner = unittest.TextTestRunner() runner.run(suite) - diff --git a/esda/tests/test_map_comparison.py b/esda/tests/test_map_comparison.py index afaeaa29..7964578e 100644 --- a/esda/tests/test_map_comparison.py +++ b/esda/tests/test_map_comparison.py @@ -1,4 +1,9 @@ -import numpy, pytest, geopandas, os +import os + +import geopandas +import numpy +import pytest + from .. import map_comparison as mc pygeos = pytest.importorskip("pygeos") @@ -63,6 +68,10 @@ def test_external_entropy(): numpy.testing.assert_allclose(v1, v2) numpy.testing.assert_allclose(v1, 0.3612313462) v1_ctipped = mc.external_entropy(r1, r2, base=base, balance=100) - numpy.testing.assert_allclose(v1_ctipped, 0.42275150844) #value from completeness + numpy.testing.assert_allclose( + v1_ctipped, 0.42275150844 + ) # value from completeness v1_htipped = mc.external_entropy(r1, r2, base=base, balance=-100) - numpy.testing.assert_allclose(v1_htipped, 0.31534179219) #value from homogeneity + numpy.testing.assert_allclose( + v1_htipped, 0.31534179219 + ) # value from homogeneity diff --git a/esda/tests/test_mixture_smoothing.py b/esda/tests/test_mixture_smoothing.py index e9806315..cd662342 100644 --- a/esda/tests/test_mixture_smoothing.py +++ b/esda/tests/test_mixture_smoothing.py @@ -1,11 +1,13 @@ import unittest + import numpy as np -import libpysal + from .. import mixture_smoothing as m_s class MS_Tester(unittest.TestCase): """Mixture_Smoothing Unit Tests""" + def setUp(self): self.e = np.array([10, 5, 12, 20]) self.b = np.array([100, 150, 80, 200]) @@ -13,29 +15,28 @@ def setUp(self): def test_NP_Mixture_Smoother(self): """Test the main class""" mix = m_s.NP_Mixture_Smoother(self.e, self.b) - np.testing.assert_array_almost_equal(mix.r, np.array( - [0.10982278, 0.03445531, 0.11018404, 0.11018604])) np.testing.assert_array_almost_equal( - mix.category, np.array([1, 0, 1, 1])) - #self.failUnless(mix.getSeed(), (np.array([ 0.5, 0.5]), np.array([ 0.03333333, + mix.r, np.array([0.10982278, 0.03445531, 0.11018404, 0.11018604]) + ) + np.testing.assert_array_almost_equal(mix.category, np.array([1, 0, 1, 1])) + # self.failUnless(mix.getSeed(), (np.array([ 0.5, 0.5]), np.array([ 0.03333333, # 0.15 ]))) left, right = mix.getSeed() np.testing.assert_array_almost_equal(left, np.array([0.5, 0.5])) - np.testing.assert_array_almost_equal( - right, np.array([0.03333333, 0.15])) + np.testing.assert_array_almost_equal(right, np.array([0.03333333, 0.15])) d = mix.mixalg() np.testing.assert_array_almost_equal( - d['mix_den'], np.array([0., 0., 0., 0.])) - np.testing.assert_array_almost_equal(d['gradient'], np.array([0.])) - np.testing.assert_array_almost_equal(d['p'], np.array([1.])) - np.testing.assert_array_almost_equal( - d['grid'], np.array([11.27659574])) - self.assertEqual(d['k'], 1) - self.assertEqual(d['accuracy'], 1.0) + d["mix_den"], np.array([0.0, 0.0, 0.0, 0.0]) + ) + np.testing.assert_array_almost_equal(d["gradient"], np.array([0.0])) + np.testing.assert_array_almost_equal(d["p"], np.array([1.0])) + np.testing.assert_array_almost_equal(d["grid"], np.array([11.27659574])) + self.assertEqual(d["k"], 1) + self.assertEqual(d["accuracy"], 1.0) left, right = mix.getRateEstimates() np.testing.assert_array_almost_equal( - left, np.array([0.0911574, 0.0911574, - 0.0911574, 0.0911574])) + left, np.array([0.0911574, 0.0911574, 0.0911574, 0.0911574]) + ) np.testing.assert_array_almost_equal(right, np.array([1, 1, 1, 1])) diff --git a/esda/tests/test_moran.py b/esda/tests/test_moran.py index 65319885..9c347539 100644 --- a/esda/tests/test_moran.py +++ b/esda/tests/test_moran.py @@ -1,11 +1,11 @@ import unittest + import libpysal -from libpysal.common import pandas, RTOL, ATOL -from .. import moran import numpy as np +from libpysal.common import ATOL, RTOL +from .. import moran -PANDAS_EXTINCT = pandas is None SEED = 12345 @@ -131,7 +131,6 @@ def test_Moran_Local_parallel(self): self.assertAlmostEqual(lm.z_sim[0], -0.6990291160835514) self.assertAlmostEqual(lm.p_z_sim[0], 0.24226691753791396) - @unittest.skip("This function is being deprecated in the next release.") def test_by_col(self): import pandas as pd @@ -158,28 +157,58 @@ def test_local_moments(self): permutations=0, seed=SEED, ) - + wikh_fast = moran._wikh_fast(lm.w.sparse) wikh_slow = moran._wikh_slow(lm.w.sparse) - wikh_fast_c = moran._wikh_fast(lm.w.sparse, sokal_correction=True) - wikh_slow_c = moran._wikh_slow(lm.w.sparse, sokal_correction=True) + # wikh_fast_c = moran._wikh_fast(lm.w.sparse, sokal_correction=True) + # wikh_slow_c = moran._wikh_slow(lm.w.sparse, sokal_correction=True) np.testing.assert_allclose(wikh_fast, wikh_slow, rtol=RTOL, atol=ATOL) np.testing.assert_allclose(wikh_fast, wikh_slow, rtol=RTOL, atol=ATOL) - EIc = np.array([-0.00838113, -0.0243949 , -0.07031778, - -0.21520869, -0.16547163, -0.00178435, - -0.11531888, -0.36138555, -0.05471258, -0.09413562]) - VIc = np.array([0.03636013, 0.10412408, 0.28600769, - 0.26389674, 0.21576683, 0.00779261, - 0.44633942, 0.57696508, 0.12929777, 0.3730742 ]) - - EI = -np.ones((10,))/9 - VI = np.array([0.47374172, 0.47356458, 0.47209663, - 0.15866023, 0.15972526, 0.47376436, - 0.46927721, 0.24584217, 0.26498308, 0.47077467]) - - + EIc = np.array( + [ + -0.00838113, + -0.0243949, + -0.07031778, + -0.21520869, + -0.16547163, + -0.00178435, + -0.11531888, + -0.36138555, + -0.05471258, + -0.09413562, + ] + ) + VIc = np.array( + [ + 0.03636013, + 0.10412408, + 0.28600769, + 0.26389674, + 0.21576683, + 0.00779261, + 0.44633942, + 0.57696508, + 0.12929777, + 0.3730742, + ] + ) + EI = -np.ones((10,)) / 9 + VI = np.array( + [ + 0.47374172, + 0.47356458, + 0.47209663, + 0.15866023, + 0.15972526, + 0.47376436, + 0.46927721, + 0.24584217, + 0.26498308, + 0.47077467, + ] + ) np.testing.assert_allclose(lm.EIc, EIc, rtol=RTOL, atol=ATOL) np.testing.assert_allclose(lm.VIc, VIc, rtol=RTOL, atol=ATOL) diff --git a/esda/tests/test_shape.py b/esda/tests/test_shape.py index eb3d5bbf..34feb00c 100644 --- a/esda/tests/test_shape.py +++ b/esda/tests/test_shape.py @@ -1,12 +1,12 @@ -from shapely import geometry -from numpy import testing, array import pytest +from numpy import array, testing +from shapely import geometry + +import esda pygeos = pytest.importorskip("pygeos") pytest.importorskip("numba") -from esda.shape import * - shape = array( [ @@ -31,94 +31,93 @@ def test_boundary_amplitude(): - observed = boundary_amplitude(shape) + observed = esda.shape.boundary_amplitude(shape) testing.assert_allclose(observed, 0.844527, atol=ATOL) def test_convex_hull_ratio(): - observed = convex_hull_ratio(shape) + observed = esda.shape.convex_hull_ratio(shape) testing.assert_allclose(observed, 0.7, atol=ATOL) def test_length_width_diff(): - observed = length_width_diff(shape) + observed = esda.shape.length_width_diff(shape) testing.assert_allclose(observed, 0.25, atol=ATOL) def test_radii_ratio(): - observed = radii_ratio(shape) + observed = esda.shape.radii_ratio(shape) testing.assert_allclose(observed, 0.659366, atol=ATOL) def test_diameter_ratio(): - observed = diameter_ratio(shape) + observed = esda.shape.diameter_ratio(shape) testing.assert_allclose(observed, 0.8, atol=ATOL) def test_iaq(): - observed = isoareal_quotient(shape) + observed = esda.shape.isoareal_quotient(shape) testing.assert_allclose(observed, 0.622314, atol=ATOL) def test_ipq(): - observed = isoperimetric_quotient(shape) + observed = esda.shape.isoperimetric_quotient(shape) testing.assert_allclose(observed, 0.387275, atol=ATOL) def test_moa(): - observed = moa_ratio(shape) + observed = esda.shape.moa_ratio(shape) testing.assert_allclose(observed, 3.249799, atol=ATOL) def test_moment_of_interia(): - observed = moment_of_inertia(shape) + observed = esda.shape.moment_of_inertia(shape) testing.assert_allclose(observed, 0.315715, atol=ATOL) def test_nmi(): - observed = nmi(shape) + observed = esda.shape.nmi(shape) testing.assert_allclose(observed, 0.487412, atol=ATOL) def test_mbc(): - observed = minimum_bounding_circle_ratio(shape) + observed = esda.shape.minimum_bounding_circle_ratio(shape) testing.assert_allclose(observed, 0.437571, atol=ATOL) def test_reflexive_angle_ratio(): - observed = reflexive_angle_ratio(shape) + observed = esda.shape.reflexive_angle_ratio(shape) testing.assert_allclose(observed, 3 / 8, atol=ATOL) def test_fractal_dimension(): r = [ - fractal_dimension(shape, support=support)[0] + esda.shape.fractal_dimension(shape, support=support)[0] for support in ("hex", "square", "circle") ] - testing.assert_allclose(r, [0.218144, -4.29504, 0.257882], atol=ATOL) def test_squareness(): - observed = squareness(shape) + observed = esda.shape.squareness(shape) testing.assert_allclose(observed, 0.493094, atol=ATOL) def test_rectangularity(): - observed = rectangularity(shape) + observed = esda.shape.rectangularity(shape) testing.assert_allclose(observed, 0.7, atol=ATOL) def test_shape_index(): - observed = shape_index(shape) + observed = esda.shape.shape_index(shape) testing.assert_allclose(observed, 0.659366, atol=ATOL) def test_equivalent_rectangular_index(): - observed = equivalent_rectangular_index(shape) + observed = esda.shape.equivalent_rectangular_index(shape) testing.assert_allclose(observed, 0.706581, atol=ATOL) def test_form_factor(): - observed = form_factor(shape, array([2])) + observed = esda.shape.form_factor(shape, array([2])) testing.assert_allclose(observed, 0.602535, atol=ATOL) diff --git a/esda/tests/test_silhouette.py b/esda/tests/test_silhouette.py index b0f9945f..56b13e77 100644 --- a/esda/tests/test_silhouette.py +++ b/esda/tests/test_silhouette.py @@ -1,124 +1,191 @@ -from .. import silhouettes -import geopandas +import unittest + import libpysal import numpy -import unittest from sklearn.metrics import pairwise +from .. import silhouettes + RTOL = libpysal.common.RTOL ATOL = libpysal.common.ATOL + class Silhouette_Tester(unittest.TestCase): def setUp(self): - self.w = libpysal.weights.lat2W(3,3, rook=False) + self.w = libpysal.weights.lat2W(3, 3, rook=False) numpy.random.seed(12345) - self.X = numpy.random.random((9,3)) - self.groups = numpy.array([[0,0,0], - [0,1,1], - [0,1,1]]).flatten() - self.precomputed = self.X @ self.X.T + self.X = numpy.random.random((9, 3)) + self.groups = numpy.array([[0, 0, 0], [0, 1, 1], [0, 1, 1]]).flatten() + self.precomputed = self.X @ self.X.T self.altmetric = pairwise.manhattan_distances def test_boundary(self): - known = numpy.array([ 0.03042804, - -0.26071434, - 0.26664155, - -0.09395997, - 0.02754655, - -0.30581229, - 0.2697089 , - -0.29993007, - 0. ]) + known = numpy.array( + [ + 0.03042804, + -0.26071434, + 0.26664155, + -0.09395997, + 0.02754655, + -0.30581229, + 0.2697089, + -0.29993007, + 0.0, + ] + ) test = silhouettes.boundary_silhouette(self.X, self.groups, self.w) numpy.testing.assert_allclose(known, test, rtol=RTOL, atol=ATOL) - known = numpy.array([ 0.04246261, - -0.17059996, - 0.35656939, - -0.10765454, - 0.00831288, - -0.30924291, - 0.36502035, - -0.25464692, - 0. ]) - test = silhouettes.boundary_silhouette(self.X, self.groups, self.w, - metric=self.altmetric) + known = numpy.array( + [ + 0.04246261, + -0.17059996, + 0.35656939, + -0.10765454, + 0.00831288, + -0.30924291, + 0.36502035, + -0.25464692, + 0.0, + ] + ) + test = silhouettes.boundary_silhouette( + self.X, self.groups, self.w, metric=self.altmetric + ) numpy.testing.assert_allclose(known, test, rtol=RTOL, atol=ATOL) - known = numpy.array([-0.43402484, - -0.29488836, - -0.3663925 , - -0.32831205, - -0.21536904, - -0.10134236, - -0.37144783, - -0.01618446, - 0. ]) - test = silhouettes.boundary_silhouette(self.X, self.groups, self.w, - metric=self.precomputed) + known = numpy.array( + [ + -0.43402484, + -0.29488836, + -0.3663925, + -0.32831205, + -0.21536904, + -0.10134236, + -0.37144783, + -0.01618446, + 0.0, + ] + ) + test = silhouettes.boundary_silhouette( + self.X, self.groups, self.w, metric=self.precomputed + ) numpy.testing.assert_allclose(known, test, rtol=RTOL, atol=ATOL) with self.assertRaises(AssertionError): - silhouettes.boundary_silhouette(self.X, self.groups, self.w, - metric=self.precomputed - self.precomputed.mean()) + silhouettes.boundary_silhouette( + self.X, + self.groups, + self.w, + metric=self.precomputed - self.precomputed.mean(), + ) def test_path(self): - known = numpy.array([ 0.15982274, - -0.02136909, - -0.3972349 , - 0.24479121, - 0.02754655, - 0.28465546, - -0.07572727, - 0.26903733, - 0.4165144 ]) + known = numpy.array( + [ + 0.15982274, + -0.02136909, + -0.3972349, + 0.24479121, + 0.02754655, + 0.28465546, + -0.07572727, + 0.26903733, + 0.4165144, + ] + ) test = silhouettes.path_silhouette(self.X, self.groups, self.w) numpy.testing.assert_allclose(known, test, rtol=RTOL, atol=ATOL) - known = numpy.array([ 0.1520476 , - 0.0390323 , - -0.34269345, - 0.27239358, - 0.00831288, - 0.27934432, - -0.03874118, - 0.28623703, - 0.40062121]) - test = silhouettes.path_silhouette(self.X, self.groups, self.w, - metric=self.altmetric) + known = numpy.array( + [ + 0.1520476, + 0.0390323, + -0.34269345, + 0.27239358, + 0.00831288, + 0.27934432, + -0.03874118, + 0.28623703, + 0.40062121, + ] + ) + test = silhouettes.path_silhouette( + self.X, self.groups, self.w, metric=self.altmetric + ) numpy.testing.assert_allclose(known, test, rtol=RTOL, atol=ATOL) with self.assertRaises(TypeError): - silhouettes.path_silhouette(self.X, self.groups, self.w, - metric=self.precomputed) + silhouettes.path_silhouette( + self.X, self.groups, self.w, metric=self.precomputed + ) with self.assertRaises(AssertionError): - silhouettes.path_silhouette(self.X, self.groups, self.w, - metric=lambda d: -self.altmetric(d)) + silhouettes.path_silhouette( + self.X, self.groups, self.w, metric=lambda d: -self.altmetric(d) + ) def test_nearest_label(self): - known = numpy.array([1,1,1,1,0,0,1,0,0]) - test,d = silhouettes.nearest_label(self.X, self.groups, return_distance=True) + known = numpy.array([1, 1, 1, 1, 0, 0, 1, 0, 0]) + test, d = silhouettes.nearest_label(self.X, self.groups, return_distance=True) numpy.testing.assert_array_equal(known, test) - known = numpy.array([0,1,0,0,1,0,0,0,1]) + known = numpy.array([0, 1, 0, 0, 1, 0, 0, 0, 1]) test = silhouettes.nearest_label(self.X, self.groups, keep_self=True) numpy.testing.assert_array_equal(test, known) - knownd = numpy.array([1.05707684, - 0.74780721, - 0.88841079, - 0.71628677, - 1.25964181, - 0.5854757 , - 0.89710073, - 0.64575898, - 0.73913526]) + knownd = numpy.array( + [ + 1.05707684, + 0.74780721, + 0.88841079, + 0.71628677, + 1.25964181, + 0.5854757, + 0.89710073, + 0.64575898, + 0.73913526, + ] + ) numpy.testing.assert_allclose(d, knownd, rtol=RTOL, atol=ATOL) def test_silhouette_alist(self): - known = numpy.array([0., 0., 0.22434244, 0., - 0., 0., -0.07589293, -0.07589293, - 0., 0.41331324, 0.41331324, 0., - 0., 0.11703681, 0., 0.11703681, - 0.27065991, 0.27065991, 0.27065991, - 0.27065991, 0., 0.27065991, - 0., 0., -0.07441639, -0.07441639, - 0., 0., 0., 0., 0.41576712, - 0.41576712, -0.06657343, 0., 0., - -0.06657343, 0., 0., 0., 0.]) + known = numpy.array( + [ + 0.0, + 0.0, + 0.22434244, + 0.0, + 0.0, + 0.0, + -0.07589293, + -0.07589293, + 0.0, + 0.41331324, + 0.41331324, + 0.0, + 0.0, + 0.11703681, + 0.0, + 0.11703681, + 0.27065991, + 0.27065991, + 0.27065991, + 0.27065991, + 0.0, + 0.27065991, + 0.0, + 0.0, + -0.07441639, + -0.07441639, + 0.0, + 0.0, + 0.0, + 0.0, + 0.41576712, + 0.41576712, + -0.06657343, + 0.0, + 0.0, + -0.06657343, + 0.0, + 0.0, + 0.0, + 0.0, + ] + ) test = silhouettes.silhouette_alist(self.X, self.groups, self.w.to_adjlist()) numpy.testing.assert_allclose(known, test.silhouette, rtol=RTOL, atol=ATOL) diff --git a/esda/tests/test_smaup.py b/esda/tests/test_smaup.py index 396397a7..01467fa1 100644 --- a/esda/tests/test_smaup.py +++ b/esda/tests/test_smaup.py @@ -1,63 +1,62 @@ import unittest + import libpysal -from libpysal.common import pandas, RTOL, ATOL -from ..smaup import Smaup -from ..moran import Moran import numpy as np +from libpysal.common import ATOL, RTOL, pandas +from ..moran import Moran +from ..smaup import Smaup PANDAS_EXTINCT = pandas is None + class Smaup_Tester(unittest.TestCase): - def setup(self): - from esda.moran import Moran + def setUp(self): self.w = libpysal.io.open(libpysal.examples.get_path("stl.gal")).read() f = libpysal.io.open(libpysal.examples.get_path("stl_hom.txt")) - self.y = np.array(f.by_col['HR8893']) + self.y = np.array(f.by_col["HR8893"]) self.rho = Moran(self.y, self.w).I self.n = len(self.y) - self.k = int(self.n/2) + self.k = int(self.n / 2) def test_smaup(self): - self.setup() sm = Smaup(self.n, self.k, self.rho) self.assertAlmostEqual(sm.n, 78) self.assertAlmostEqual(sm.k, 39) self.assertAlmostEqual(sm.rho, 0.24365582621771695) - np.testing.assert_allclose(sm.smaup, 0.15221341690376405, rtol=RTOL, atol=ATOL) + np.testing.assert_allclose(sm.smaup, 0.15221341690376405, rtol=RTOL, atol=ATOL) self.assertAlmostEqual(sm.critical_01, 0.38970613333333337) self.assertAlmostEqual(sm.critical_05, 0.3557221333333333) self.assertAlmostEqual(sm.critical_1, 0.3157950666666666) - self.assertEqual(sm.summary, 'Pseudo p-value > 0.10 (H0 is not rejected)') + self.assertEqual(sm.summary, "Pseudo p-value > 0.10 (H0 is not rejected)") def test_sids(self): - from esda import moran w = libpysal.io.open(libpysal.examples.get_path("sids2.gal")).read() f = libpysal.io.open(libpysal.examples.get_path("sids2.dbf")) SIDR = np.array(f.by_col("SIDR74")) - rho = moran.Moran(SIDR, w, two_tailed=False).I + rho = Moran(SIDR, w, two_tailed=False).I n = len(SIDR) - k = int(n/2) + k = int(n / 2) sm = Smaup(n, k, rho) - np.testing.assert_allclose(sm.smaup, 0.15176796553181948, rtol=RTOL, atol=ATOL) + np.testing.assert_allclose(sm.smaup, 0.15176796553181948, rtol=RTOL, atol=ATOL) self.assertAlmostEqual(sm.critical_01, 0.23404000000000003) self.assertAlmostEqual(sm.critical_05, 0.21088) self.assertAlmostEqual(sm.critical_1, 0.18239) - self.assertEqual(sm.summary, 'Pseudo p-value > 0.10 (H0 is not rejected)') + self.assertEqual(sm.summary, "Pseudo p-value > 0.10 (H0 is not rejected)") - @unittest.skipIf(PANDAS_EXTINCT, 'missing pandas') + @unittest.skipIf(PANDAS_EXTINCT, "missing pandas") def test_by_col(self): from libpysal.io import geotable as pdio - from esda import moran + np.random.seed(11213) - df = pdio.read_files(libpysal.examples.get_path('sids2.dbf')) + df = pdio.read_files(libpysal.examples.get_path("sids2.dbf")) w = libpysal.io.open(libpysal.examples.get_path("sids2.gal")).read() - k = int(w.n/2) - mi = moran.Moran.by_col(df, ['SIDR74'], w=w, two_tailed=False) + k = int(w.n / 2) + mi = Moran.by_col(df, ["SIDR74"], w=w, two_tailed=False) rho = np.unique(mi.SIDR74_moran.values).item() sm = Smaup(w.n, k, rho) np.testing.assert_allclose(sm.smaup, 0.15176796553181948, atol=ATOL, rtol=RTOL) self.assertAlmostEqual(sm.critical_01, 0.23404000000000003) self.assertAlmostEqual(sm.critical_05, 0.21088) self.assertAlmostEqual(sm.critical_1, 0.18239) - self.assertEqual(sm.summary, 'Pseudo p-value > 0.10 (H0 is not rejected)') + self.assertEqual(sm.summary, "Pseudo p-value > 0.10 (H0 is not rejected)") diff --git a/esda/tests/test_smoothing.py b/esda/tests/test_smoothing.py index df8fdc28..6dbffd9c 100644 --- a/esda/tests/test_smoothing.py +++ b/esda/tests/test_smoothing.py @@ -1,12 +1,15 @@ import unittest + import libpysal +import numpy as np +from libpysal.common import ATOL, RTOL, pandas from libpysal.weights.distance import KNN, Kernel + from .. import smoothing as sm -import numpy as np -from libpysal.common import RTOL, ATOL, pandas PANDAS_EXTINCT = pandas is None + class TestFlatten(unittest.TestCase): def setUp(self): self.input = [[1, 2], [3, 3, 4], [5, 6]] @@ -36,7 +39,7 @@ def setUp(self): self.e = np.array([30, 25, 25, 15, 33, 21, 30, 20]) self.b = np.array([1000, 1000, 1100, 900, 1000, 900, 1100, 900]) self.s_e = np.array([100, 45, 120, 100, 50, 30, 200, 80]) - self.s_b = s = np.array([1000, 900, 1000, 900, 1000, 900, 1000, 900]) + self.s_b = np.array([1000, 900, 1000, 900, 1000, 900, 1000, 900]) self.n = 2 def test_crude_age_standardization(self): @@ -45,25 +48,28 @@ def test_crude_age_standardization(self): self.assertEqual(list(crude), list(crude_exp)) def test_direct_age_standardization(self): - direct = np.array(sm.direct_age_standardization( - self.e, self.b, self.s_b, self.n)).round(8) - direct_exp = np.array([[0.02374402, 0.01920491, - 0.02904848], [0.02665072, 0.02177143, 0.03230508]]) + direct = np.array( + sm.direct_age_standardization(self.e, self.b, self.s_b, self.n) + ).round(8) + direct_exp = np.array( + [[0.02374402, 0.01920491, 0.02904848], [0.02665072, 0.02177143, 0.03230508]] + ) self.assertEqual(list(direct.flatten()), list(direct_exp.flatten())) def test_indirect_age_standardization(self): - indirect = np.array(sm.indirect_age_standardization( - self.e, self.b, self.s_e, self.s_b, self.n)).round(8) - indirect_exp = np.array([[0.02372382, 0.01940230, - 0.02900789], [0.02610803, .02154304, 0.03164035]]) - self.assertEqual( - list(indirect.flatten()), list(indirect_exp.flatten())) + indirect = np.array( + sm.indirect_age_standardization(self.e, self.b, self.s_e, self.s_b, self.n) + ).round(8) + indirect_exp = np.array( + [[0.02372382, 0.01940230, 0.02900789], [0.02610803, 0.02154304, 0.03164035]] + ) + self.assertEqual(list(indirect.flatten()), list(indirect_exp.flatten())) class TestSRate(unittest.TestCase): def setUp(self): - sids = libpysal.io.open(libpysal.examples.get_path('sids2.dbf'), 'r') - self.w = libpysal.io.open(libpysal.examples.get_path('sids2.gal'), 'r').read() + sids = libpysal.io.open(libpysal.examples.get_path("sids2.dbf"), "r") + self.w = libpysal.io.open(libpysal.examples.get_path("sids2.gal"), "r").read() self.b, self.e = np.array(sids[:, 8]), np.array(sids[:, 9]) self.er = [0.453433, 0.000000, 0.775871, 0.973810, 3.133190] self.eb = [0.0016973, 0.0017054, 0.0017731, 0.0020129, 0.0035349] @@ -71,99 +77,101 @@ def setUp(self): self.smr = [0.00083622, 0.00109402, 0.00081567, 0.0, 0.0048209] self.smr_w = [0.00127146, 0.00127146, 0.0008433, 0.0, 0.0049889] self.smr2 = [0.00091659, 0.00087641, 0.00091073, 0.0, 0.00467633] - self.s_ebr10 = np.array([4.01485749e-05, 3.62437513e-05, - 4.93034844e-05, 5.09387329e-05, 3.72735210e-05, - 3.69333797e-05, 5.40245456e-05, 2.99806055e-05, - 3.73034109e-05, 3.47270722e-05]).reshape(-1,1) - - stl_ex = libpysal.examples.load_example('stl') - self.stl = libpysal.io.open(stl_ex.get_path('stl_hom.csv'), 'r') + self.s_ebr10 = np.array( + [ + 4.01485749e-05, + 3.62437513e-05, + 4.93034844e-05, + 5.09387329e-05, + 3.72735210e-05, + 3.69333797e-05, + 5.40245456e-05, + 2.99806055e-05, + 3.73034109e-05, + 3.47270722e-05, + ] + ).reshape(-1, 1) + + stl_ex = libpysal.examples.load_example("stl") + self.stl = libpysal.io.open(stl_ex.get_path("stl_hom.csv"), "r") self.stl_e, self.stl_b = np.array(self.stl[:, 10]), np.array(self.stl[:, 13]) - self.stl_w = libpysal.io.open(stl_ex.get_path('stl.gal'), 'r').read() + self.stl_w = libpysal.io.open(stl_ex.get_path("stl.gal"), "r").read() if not self.stl_w.id_order_set: self.stl_w.id_order = list(range(1, len(self.stl) + 1)) if not PANDAS_EXTINCT: - self.df = libpysal.io.open(libpysal.examples.get_path('sids2.dbf')).to_df() - self.ename = 'SID74' - self.bname = 'BIR74' - self.stl_df = libpysal.io.open(libpysal.examples.get_path('stl_hom.csv')).to_df() - self.stl_ename = 'HC7984' - self.stl_bname = 'PO7984' + self.df = libpysal.io.open(libpysal.examples.get_path("sids2.dbf")).to_df() + self.ename = "SID74" + self.bname = "BIR74" + self.stl_df = libpysal.io.open( + libpysal.examples.get_path("stl_hom.csv") + ).to_df() + self.stl_ename = "HC7984" + self.stl_bname = "PO7984" def test_Excess_Risk(self): out_er = sm.Excess_Risk(self.e, self.b).r - np.testing.assert_allclose(out_er[:5].flatten(), self.er, - rtol=RTOL, atol=ATOL) + np.testing.assert_allclose(out_er[:5].flatten(), self.er, rtol=RTOL, atol=ATOL) - @unittest.skipIf(PANDAS_EXTINCT, 'missing pandas') + @unittest.skipIf(PANDAS_EXTINCT, "missing pandas") def test_Excess_Risk_tabular(self): out_er = sm.Excess_Risk(self.df[self.ename], self.df[self.bname]).r - np.testing.assert_allclose(out_er[:5].flatten(), self.er, - rtol=RTOL, atol=ATOL) + np.testing.assert_allclose(out_er[:5].flatten(), self.er, rtol=RTOL, atol=ATOL) self.assertIsInstance(out_er, np.ndarray) out_er = sm.Excess_Risk.by_col(self.df, self.ename, self.bname) - outcol = '{}-{}_excess_risk'.format(self.ename, self.bname) + outcol = "{}-{}_excess_risk".format(self.ename, self.bname) outcol = out_er[outcol] - np.testing.assert_allclose(outcol[:5], self.er, - rtol=RTOL, atol=ATOL) + np.testing.assert_allclose(outcol[:5], self.er, rtol=RTOL, atol=ATOL) self.assertIsInstance(outcol.values, np.ndarray) def test_Empirical_Bayes(self): out_eb = sm.Empirical_Bayes(self.e, self.b).r - np.testing.assert_allclose(out_eb[:5].flatten(), self.eb, - rtol=RTOL, atol=ATOL) + np.testing.assert_allclose(out_eb[:5].flatten(), self.eb, rtol=RTOL, atol=ATOL) - @unittest.skipIf(PANDAS_EXTINCT, 'missing pandas') + @unittest.skipIf(PANDAS_EXTINCT, "missing pandas") def test_Empirical_Bayes_tabular(self): out_eb = sm.Empirical_Bayes(self.df[self.ename], self.df[self.bname]).r - np.testing.assert_allclose(out_eb[:5].flatten(), self.eb, - rtol=RTOL, atol=ATOL) + np.testing.assert_allclose(out_eb[:5].flatten(), self.eb, rtol=RTOL, atol=ATOL) self.assertIsInstance(out_eb, np.ndarray) out_eb = sm.Empirical_Bayes.by_col(self.df, self.ename, self.bname) - outcol = '{}-{}_empirical_bayes'.format(self.ename, self.bname) + outcol = "{}-{}_empirical_bayes".format(self.ename, self.bname) outcol = out_eb[outcol] - np.testing.assert_allclose(outcol[:5], self.eb, - rtol=RTOL, atol=ATOL) + np.testing.assert_allclose(outcol[:5], self.eb, rtol=RTOL, atol=ATOL) self.assertIsInstance(outcol.values, np.ndarray) def test_Spatial_Empirical_Bayes(self): s_eb = sm.Spatial_Empirical_Bayes(self.stl_e, self.stl_b, self.stl_w) - np.testing.assert_allclose(self.s_ebr10, s_eb.r[:10], - rtol=RTOL, atol=ATOL) + np.testing.assert_allclose(self.s_ebr10, s_eb.r[:10], rtol=RTOL, atol=ATOL) - @unittest.skipIf(PANDAS_EXTINCT, 'missing pandas') + @unittest.skipIf(PANDAS_EXTINCT, "missing pandas") def test_Spatial_Empirical_Bayes_tabular(self): - s_eb = sm.Spatial_Empirical_Bayes(self.stl_df[self.stl_ename], - self.stl_df[self.stl_bname], - self.stl_w).r + s_eb = sm.Spatial_Empirical_Bayes( + self.stl_df[self.stl_ename], self.stl_df[self.stl_bname], self.stl_w + ).r self.assertIsInstance(s_eb, np.ndarray) np.testing.assert_allclose(self.s_ebr10, s_eb[:10]) - s_eb = sm.Spatial_Empirical_Bayes.by_col(self.stl_df, - self.stl_ename, - self.stl_bname, - self.stl_w) - outcol = '{}-{}_spatial_empirical_bayes'.format(self.stl_ename, self.stl_bname) + s_eb = sm.Spatial_Empirical_Bayes.by_col( + self.stl_df, self.stl_ename, self.stl_bname, self.stl_w + ) + outcol = "{}-{}_spatial_empirical_bayes".format(self.stl_ename, self.stl_bname) r = s_eb[outcol].values self.assertIsInstance(r, np.ndarray) - np.testing.assert_allclose(self.s_ebr10, r[:10].reshape(-1,1)) + np.testing.assert_allclose(self.s_ebr10, r[:10].reshape(-1, 1)) def test_Spatial_Rate(self): out_sr = sm.Spatial_Rate(self.e, self.b, self.w).r - np.testing.assert_allclose(out_sr[:5].flatten(), self.sr, - rtol=RTOL, atol=ATOL) + np.testing.assert_allclose(out_sr[:5].flatten(), self.sr, rtol=RTOL, atol=ATOL) - @unittest.skipIf(PANDAS_EXTINCT, 'missing pandas') + @unittest.skipIf(PANDAS_EXTINCT, "missing pandas") def test_Spatial_Rate_tabular(self): out_sr = sm.Spatial_Rate(self.df[self.ename], self.df[self.bname], self.w).r - np.testing.assert_allclose(out_sr[:5].flatten(), self.sr, - rtol=RTOL, atol=ATOL) + np.testing.assert_allclose(out_sr[:5].flatten(), self.sr, rtol=RTOL, atol=ATOL) self.assertIsInstance(out_sr, np.ndarray) - out_sr = sm.Spatial_Rate.by_col(self.df, self.ename, self.bname,w=self.w) - outcol = '{}-{}_spatial_rate'.format(self.ename, self.bname) + out_sr = sm.Spatial_Rate.by_col(self.df, self.ename, self.bname, w=self.w) + outcol = "{}-{}_spatial_rate".format(self.ename, self.bname) r = out_sr[outcol].values self.assertIsInstance(r, np.ndarray) np.testing.assert_allclose(r[:5], self.sr, rtol=RTOL, atol=ATOL) @@ -172,129 +180,139 @@ def test_Spatial_Median_Rate(self): out_smr = sm.Spatial_Median_Rate(self.e, self.b, self.w).r out_smr_w = sm.Spatial_Median_Rate(self.e, self.b, self.w, aw=self.b).r out_smr2 = sm.Spatial_Median_Rate(self.e, self.b, self.w, iteration=2).r - np.testing.assert_allclose(out_smr[:5].flatten(), self.smr, - atol=ATOL, rtol=RTOL) - np.testing.assert_allclose(out_smr_w[:5].flatten(), self.smr_w, - atol=ATOL, rtol=RTOL) - np.testing.assert_allclose(out_smr2[:5].flatten(), self.smr2, - atol=ATOL, rtol=RTOL) - - @unittest.skipIf(PANDAS_EXTINCT, 'missing pandas') + np.testing.assert_allclose( + out_smr[:5].flatten(), self.smr, atol=ATOL, rtol=RTOL + ) + np.testing.assert_allclose( + out_smr_w[:5].flatten(), self.smr_w, atol=ATOL, rtol=RTOL + ) + np.testing.assert_allclose( + out_smr2[:5].flatten(), self.smr2, atol=ATOL, rtol=RTOL + ) + + @unittest.skipIf(PANDAS_EXTINCT, "missing pandas") def test_Spatial_Median_Rate_tabular(self): - out_smr = sm.Spatial_Median_Rate(self.df[self.ename], - self.df[self.bname], - self.w).r - out_smr_w = sm.Spatial_Median_Rate(self.df[self.ename], - self.df[self.bname], - self.w, - aw = self.df[self.bname]).r - out_smr2 = sm.Spatial_Median_Rate(self.df[self.ename], - self.df[self.bname], - self.w, - iteration=2).r + out_smr = sm.Spatial_Median_Rate( + self.df[self.ename], self.df[self.bname], self.w + ).r + out_smr_w = sm.Spatial_Median_Rate( + self.df[self.ename], self.df[self.bname], self.w, aw=self.df[self.bname] + ).r + out_smr2 = sm.Spatial_Median_Rate( + self.df[self.ename], self.df[self.bname], self.w, iteration=2 + ).r self.assertIsInstance(out_smr, np.ndarray) self.assertIsInstance(out_smr_w, np.ndarray) self.assertIsInstance(out_smr2, np.ndarray) - np.testing.assert_allclose(out_smr[:5].flatten(), self.smr, - atol=ATOL, rtol=RTOL) - np.testing.assert_allclose(out_smr_w[:5].flatten(), self.smr_w, - atol=ATOL, rtol=RTOL) - np.testing.assert_allclose(out_smr2[:5].flatten(), self.smr2, - atol=ATOL, rtol=RTOL) - - out_smr = sm.Spatial_Median_Rate.by_col(self.df, self.ename, - self.bname, self.w) - out_smr_w = sm.Spatial_Median_Rate.by_col(self.df, self.ename, - self.bname, self.w, - aw = self.df[self.bname]) - out_smr2 = sm.Spatial_Median_Rate.by_col(self.df, self.ename, - self.bname, self.w, - iteration=2) - outcol = '{}-{}_spatial_median_rate'.format(self.ename, self.bname) - - np.testing.assert_allclose(out_smr[outcol].values[:5], self.smr, - rtol=RTOL, atol=ATOL) - np.testing.assert_allclose(out_smr_w[outcol].values[:5], self.smr_w, - rtol=RTOL, atol=ATOL) - np.testing.assert_allclose(out_smr2[outcol].values[:5], self.smr2, - rtol=RTOL, atol=ATOL) - - @unittest.skipIf(PANDAS_EXTINCT, 'missing pandas') + np.testing.assert_allclose( + out_smr[:5].flatten(), self.smr, atol=ATOL, rtol=RTOL + ) + np.testing.assert_allclose( + out_smr_w[:5].flatten(), self.smr_w, atol=ATOL, rtol=RTOL + ) + np.testing.assert_allclose( + out_smr2[:5].flatten(), self.smr2, atol=ATOL, rtol=RTOL + ) + + out_smr = sm.Spatial_Median_Rate.by_col(self.df, self.ename, self.bname, self.w) + out_smr_w = sm.Spatial_Median_Rate.by_col( + self.df, self.ename, self.bname, self.w, aw=self.df[self.bname] + ) + out_smr2 = sm.Spatial_Median_Rate.by_col( + self.df, self.ename, self.bname, self.w, iteration=2 + ) + outcol = "{}-{}_spatial_median_rate".format(self.ename, self.bname) + + np.testing.assert_allclose( + out_smr[outcol].values[:5], self.smr, rtol=RTOL, atol=ATOL + ) + np.testing.assert_allclose( + out_smr_w[outcol].values[:5], self.smr_w, rtol=RTOL, atol=ATOL + ) + np.testing.assert_allclose( + out_smr2[outcol].values[:5], self.smr2, rtol=RTOL, atol=ATOL + ) + + @unittest.skipIf(PANDAS_EXTINCT, "missing pandas") def test_Spatial_Smoother_multicol(self): """ test that specifying multiple columns works correctly. Since the function is shared over all spatial smoothers, we can only test one. """ - enames = [self.ename, 'SID79'] - bnames = [self.bname, 'BIR79'] + enames = [self.ename, "SID79"] + bnames = [self.bname, "BIR79"] out_df = sm.Spatial_Median_Rate.by_col(self.df, enames, bnames, self.w) - outcols = ['{}-{}_spatial_median_rate'.format(e,b) - for e,b in zip(enames, bnames)] - smr79 = np.array([0.00122129, 0.00176924, 0.00176924, - 0.00240964, 0.00272035]) + outcols = [ + "{}-{}_spatial_median_rate".format(e, b) for e, b in zip(enames, bnames) + ] + smr79 = np.array( + [0.00122129, 0.00176924, 0.00176924, 0.00240964, 0.00272035] + ) answers = [self.smr, smr79] for col, answer in zip(outcols, answer): self.assertIn(out_df.columns, col) - np.testing.assert_allclose(out_df[col].values[:5], answer, - rtol=RTOL, atol=ATOL) + np.testing.assert_allclose( + out_df[col].values[:5], answer, rtol=RTOL, atol=ATOL + ) - @unittest.skipIf(PANDAS_EXTINCT, 'missing pandas') + @unittest.skipIf(PANDAS_EXTINCT, "missing pandas") def test_Smoother_multicol(self): """ test that non-spatial smoothers work with multicolumn queries """ - enames = [self.ename, 'SID79'] - bnames = [self.bname, 'BIR79'] + enames = [self.ename, "SID79"] + bnames = [self.bname, "BIR79"] out_df = sm.Excess_Risk.by_col(self.df, enames, bnames) - outcols = ['{}-{}_excess_risk'.format(e,b) - for e,b in zip(enames, bnames)] - er79 = np.array([0.000000, 2.796607, 0.8383863, - 1.217479, 0.943811]) + outcols = ["{}-{}_excess_risk".format(e, b) for e, b in zip(enames, bnames)] + er79 = np.array([0.000000, 2.796607, 0.8383863, 1.217479, 0.943811]) answers = [self.er, er79] for col, answer in zip(outcols, answer): self.assertIn(out_df.columns, col) - np.testing.assert_allclose(out_df[col].values[:5], answer, - rtol=RTOL, atol=ATOL) + np.testing.assert_allclose( + out_df[col].values[:5], answer, rtol=RTOL, atol=ATOL + ) + class TestHB(unittest.TestCase): def setUp(self): - sids = libpysal.io.open(libpysal.examples.get_path('sids2.shp'), 'r') + sids = libpysal.io.open(libpysal.examples.get_path("sids2.shp"), "r") self.sids = sids self.d = np.array([i.centroid for i in sids]) self.w = KNN.from_array(self.d, k=5) if not self.w.id_order_set: self.w.id_order = self.w.id_order - sids_db = libpysal.io.open(libpysal.examples.get_path('sids2.dbf'), 'r') + sids_db = libpysal.io.open(libpysal.examples.get_path("sids2.dbf"), "r") self.b, self.e = np.array(sids_db[:, 8]), np.array(sids_db[:, 9]) - self.sids_hb_rr5 = np.array([0.00075586, 0., - 0.0008285, 0.0018315, 0.00498891]) - self.sids_hb_r2r5 = np.array([0.0008285, 0.00084331, - 0.00086896, 0.0018315, 0.00498891]) - self.sids_hb_r3r5 = np.array([0.00091659, 0., - 0.00156838, 0.0018315, 0.00498891]) + self.sids_hb_rr5 = np.array([0.00075586, 0.0, 0.0008285, 0.0018315, 0.00498891]) + self.sids_hb_r2r5 = np.array( + [0.0008285, 0.00084331, 0.00086896, 0.0018315, 0.00498891] + ) + self.sids_hb_r3r5 = np.array( + [0.00091659, 0.0, 0.00156838, 0.0018315, 0.00498891] + ) if not PANDAS_EXTINCT: self.df = sids_db.to_df() - self.ename = 'SID74' - self.bname = 'BIR74' - self.enames = [self.ename, 'SID79'] - self.bnames = [self.bname, 'BIR79'] - self.sids79r = np.array([.000563, .001659, .001879, - .002410, .002720]) - @unittest.skip('Deprecated') + self.ename = "SID74" + self.bname = "BIR74" + self.enames = [self.ename, "SID79"] + self.bnames = [self.bname, "BIR79"] + self.sids79r = np.array([0.000563, 0.001659, 0.001879, 0.002410, 0.002720]) + + @unittest.skip("Deprecated") def test_Headbanging_Triples(self): ht = sm.Headbanging_Triples(self.d, self.w) self.assertEqual(len(ht.triples), len(self.d)) ht2 = sm.Headbanging_Triples(self.d, self.w, edgecor=True) - self.assertTrue(hasattr(ht2, 'extra')) + self.assertTrue(hasattr(ht2, "extra")) self.assertEqual(len(ht2.triples), len(self.d)) htr = sm.Headbanging_Median_Rate(self.e, self.b, ht2, iteration=5) self.assertEqual(len(htr.r), len(self.e)) for i in htr.r: self.assertTrue(i is not None) - @unittest.skip('Deprecated') + @unittest.skip("Deprecated") def test_Headbanging_Median_Rate(self): s_ht = sm.Headbanging_Triples(self.d, self.w, k=5) sids_hb_r = sm.Headbanging_Median_Rate(self.e, self.b, s_ht) @@ -304,39 +322,45 @@ def test_Headbanging_Median_Rate(self): sids_hb_r3 = sm.Headbanging_Median_Rate(self.e, self.b, s_ht, aw=self.b) np.testing.assert_array_almost_equal(self.sids_hb_r3r5, sids_hb_r3.r[:5]) - #@unittest.skipIf(PANDAS_EXTINCT, 'missing pandas') - @unittest.skip('Deprecated') + # @unittest.skipIf(PANDAS_EXTINCT, 'missing pandas') + @unittest.skip("Deprecated") def test_Headbanging_Median_Rate_tabular(self): # test that dataframe columns are treated correctly s_ht = sm.Headbanging_Triples(self.d, self.w, k=5) - sids_hb_r = sm.Headbanging_Median_Rate(self.df[self.ename], - self.df[self.bname], s_ht) + sids_hb_r = sm.Headbanging_Median_Rate( + self.df[self.ename], self.df[self.bname], s_ht + ) self.assertIsInstance(sids_hb_r.r, np.ndarray) np.testing.assert_array_almost_equal(self.sids_hb_rr5, sids_hb_r.r[:5]) - sids_hb_r2 = sm.Headbanging_Median_Rate(self.df[self.ename], - self.df[self.bname], s_ht, - iteration=5) + sids_hb_r2 = sm.Headbanging_Median_Rate( + self.df[self.ename], self.df[self.bname], s_ht, iteration=5 + ) self.assertIsInstance(sids_hb_r2.r, np.ndarray) np.testing.assert_array_almost_equal(self.sids_hb_r2r5, sids_hb_r2.r[:5]) - sids_hb_r3 = sm.Headbanging_Median_Rate(self.df[self.ename], - self.df[self.bname], s_ht, - aw=self.df[self.bname]) + sids_hb_r3 = sm.Headbanging_Median_Rate( + self.df[self.ename], self.df[self.bname], s_ht, aw=self.df[self.bname] + ) self.assertIsInstance(sids_hb_r3.r, np.ndarray) np.testing.assert_array_almost_equal(self.sids_hb_r3r5, sids_hb_r3.r[:5]) - #test that the by col on multiple names works correctly - sids_hr_df = sm.Headbanging_Median_Rate.by_col(self.df, self.enames, - self.bnames, w=self.w) - outcols = ['{}-{}_headbanging_median_rate'.format(e,b) for e,b in - zip(self.enames, self.bnames)] + # test that the by col on multiple names works correctly + sids_hr_df = sm.Headbanging_Median_Rate.by_col( + self.df, self.enames, self.bnames, w=self.w + ) + outcols = [ + "{}-{}_headbanging_median_rate".format(e, b) + for e, b in zip(self.enames, self.bnames) + ] for col, answer in zip(outcols, [self.sids_hb_rr5, self.sids79r]): this_col = sids_hr_df[col].values self.assertIsInstance(this_col, np.ndarray) - np.testing.assert_allclose(sids_hr_df[col][:5], answer, - rtol=RTOL, atol=ATOL*10) + np.testing.assert_allclose( + sids_hr_df[col][:5], answer, rtol=RTOL, atol=ATOL * 10 + ) + class TestKernel_AgeAdj_SM(unittest.TestCase): def setUp(self): @@ -345,8 +369,7 @@ def setUp(self): self.e1 = np.array([10, 8, 1, 4, 3, 5, 4, 3, 2, 1, 5, 3]) self.b1 = np.array([100, 90, 15, 30, 25, 20, 30, 20, 80, 80, 90, 60]) self.s = np.array([98, 88, 15, 29, 20, 23, 33, 25, 76, 80, 89, 66]) - self.points = [( - 10, 10), (20, 10), (40, 10), (15, 20), (30, 20), (30, 30)] + self.points = [(10, 10), (20, 10), (40, 10), (15, 20), (30, 20), (30, 30)] self.kw = Kernel(self.points) self.points1 = np.array(self.points) + 5 self.points1 = np.vstack((self.points1, self.points)) @@ -355,43 +378,88 @@ def setUp(self): self.kw.id_order = list(range(0, len(self.points))) if not PANDAS_EXTINCT: import pandas as pd + dfa = np.array([self.e, self.b]).T dfb = np.array([self.e1, self.b1, self.s]).T - self.dfa = pd.DataFrame(dfa, columns=['e','b']) - self.dfb = pd.DataFrame(dfb, columns=['e', 'b', 's']) - - #answers - self.kernel_exp = [0.10543301, 0.0858573, 0.08256196, 0.09884584, - 0.04756872, 0.04845298] - self.ageadj_exp = [0.10519625, 0.08494318, 0.06440072, 0.06898604, - 0.06952076, 0.05020968] - self.disk_exp = [0.12222222000000001, 0.10833333, 0.08055556, - 0.08944444, 0.09944444, 0.09351852] - self.sf_exp = np.array([ 0.111111, 0.111111, 0.085106, 0.076923]) + self.dfa = pd.DataFrame(dfa, columns=["e", "b"]) + self.dfb = pd.DataFrame(dfb, columns=["e", "b", "s"]) + + # answers + self.kernel_exp = [ + 0.10543301, + 0.0858573, + 0.08256196, + 0.09884584, + 0.04756872, + 0.04845298, + ] + self.ageadj_exp = [ + 0.10519625, + 0.08494318, + 0.06440072, + 0.06898604, + 0.06952076, + 0.05020968, + ] + self.disk_exp = [ + 0.12222222000000001, + 0.10833333, + 0.08055556, + 0.08944444, + 0.09944444, + 0.09351852, + ] + self.sf_exp = np.array([0.111111, 0.111111, 0.085106, 0.076923]) def test_Kernel_Smoother(self): kr = sm.Kernel_Smoother(self.e, self.b, self.kw) np.testing.assert_allclose(kr.r.flatten(), self.kernel_exp) - @unittest.skipIf(PANDAS_EXTINCT, 'missing pandas') + @unittest.skipIf(PANDAS_EXTINCT, "missing pandas") def test_Kernel_Smoother_tabular(self): dfa, dfb = self.dfa, self.dfb - kr = sm.Kernel_Smoother(dfa['e'], dfa['b'], self.kw) + kr = sm.Kernel_Smoother(dfa["e"], dfa["b"], self.kw) np.testing.assert_allclose(kr.r.flatten(), kernel_exp) - kr = sm.Kernel_Smoother.by_col(dfa, 'e', 'b', w=self.kw) - colname = 'e_b_kernel_smoother' + kr = sm.Kernel_Smoother.by_col(dfa, "e", "b", w=self.kw) + colname = "e_b_kernel_smoother" np.testing.assert_allclose(kr[colname].values, kernel_exp) - kr = sm.Kernel_Smoother.by_col(dfb, ['e', 's'], 'b', w=self.kw) - outcols = ['{}-b_kernel_smoother'.format(l) for l in ['e','s']] - - exp_eb = np.array([ 0.08276363, 0.08096262, 0.03636364, 0.0704302 , - 0.07996067, 0.1287226 , 0.09831286, 0.0952105 , - 0.02857143, 0.06671039, 0.07129231, 0.08078792]) - exp_sb = np.array([ 1.00575463, 0.99597005, 0.96363636, 0.99440132, - 0.98468399, 1.07912333, 1.03376267, 1.02759815, - 0.95428572, 0.99716186, 0.98277235, 1.03906155]) + kr = sm.Kernel_Smoother.by_col(dfb, ["e", "s"], "b", w=self.kw) + outcols = ["{}-b_kernel_smoother".format(l) for l in ["e", "s"]] # noqa E741 + + exp_eb = np.array( + [ + 0.08276363, + 0.08096262, + 0.03636364, + 0.0704302, + 0.07996067, + 0.1287226, + 0.09831286, + 0.0952105, + 0.02857143, + 0.06671039, + 0.07129231, + 0.08078792, + ] + ) + exp_sb = np.array( + [ + 1.00575463, + 0.99597005, + 0.96363636, + 0.99440132, + 0.98468399, + 1.07912333, + 1.03376267, + 1.02759815, + 0.95428572, + 0.99716186, + 0.98277235, + 1.03906155, + ] + ) for name, answer in zip(outcols, [exp_eb, exp_sb]): np.testing.assert_allclose(kr[name].values, answer, rtol=RTOL, atol=ATOL) @@ -399,35 +467,37 @@ def test_Age_Adjusted_Smoother(self): ar = sm.Age_Adjusted_Smoother(self.e1, self.b1, self.kw, self.s) np.testing.assert_allclose(ar.r, self.ageadj_exp) - @unittest.skipIf(PANDAS_EXTINCT, 'missing pandas') + @unittest.skipIf(PANDAS_EXTINCT, "missing pandas") def test_Age_Adjusted_Smoother_tabular(self): dfb = self.dfb kr = sm.Age_Adjusted_Smoother(dfb.e, dfb.b, s=dfb.s, w=self.kw) self.assertIsInstance(kr.r, np.ndarray) np.testing.assert_allclose(kr.r, self.ageadj_exp) - kr = sm.Age_Adjusted_Smoother.by_col(dfb, 'e', 'b', w=self.kw, s='s') - answer = np.array([ 0.10519625, 0.08494318, 0.06440072, - 0.06898604, 0.06952076, 0.05020968]) - colname = 'e-b_age_adjusted_smoother' + kr = sm.Age_Adjusted_Smoother.by_col(dfb, "e", "b", w=self.kw, s="s") + answer = np.array( + [0.10519625, 0.08494318, 0.06440072, 0.06898604, 0.06952076, 0.05020968] + ) + colname = "e-b_age_adjusted_smoother" np.testing.assert_allclose(kr[colname].values, answer, rtol=RTOL, atol=ATOL) def test_Disk_Smoother(self): - self.kw.transform = 'b' + self.kw.transform = "b" disk = sm.Disk_Smoother(self.e, self.b, self.kw) np.testing.assert_allclose(disk.r.flatten(), self.disk_exp) - @unittest.skipIf(PANDAS_EXTINCT, 'missing pandas') + @unittest.skipIf(PANDAS_EXTINCT, "missing pandas") def test_Disk_Smoother_tabular(self): - self.kw.transform = 'b' + self.kw.transform = "b" dfa = self.dfa disk = sm.Disk_Smoother(dfa.e, dfa.b, self.kw).r np.testing.assert_allclose(disk.flatten(), self.disk_exp) - disk = sm.Disk_Smoother.by_col(dfa, 'e', 'b', self.kw) - col = 'e-b_disk_smoother' - np.testing.assert_allclose(disk[col].values, self.disk_exp, - rtol=RTOL, atol=ATOL) + disk = sm.Disk_Smoother.by_col(dfa, "e", "b", self.kw) + col = "e-b_disk_smoother" + np.testing.assert_allclose( + disk[col].values, self.disk_exp, rtol=RTOL, atol=ATOL + ) def test_Spatial_Filtering(self): points = np.array(self.points) @@ -435,35 +505,56 @@ def test_Spatial_Filtering(self): sf = sm.Spatial_Filtering(bbox, points, self.e, self.b, 2, 2, r=30) np.testing.assert_allclose(sf.r, self.sf_exp, rtol=RTOL, atol=ATOL) - @unittest.skipIf(PANDAS_EXTINCT, 'missing pandas') + @unittest.skipIf(PANDAS_EXTINCT, "missing pandas") def test_Kernel_Smoother_tabular(self): point_array = np.array(self.points) - bbox = [[0,0] , [45,45]] + bbox = [[0, 0], [45, 45]] dfa = self.dfa sf = sm.Spatial_Filtering(bbox, point_array, dfa.e, dfa.b, 2, 2, r=30) np.testing.assert_allclose(sf.r, self.sf_exp, rtol=RTOL, atol=ATOL) - dfa['geometry'] = self.points - sf = sm.Spatial_Filtering.by_col(dfa, 'e', 'b', 3, 3, r=30) - r_answer = np.array([ 0.07692308, 0.07213115, 0.07213115, 0.07692308, - 0.07692308, 0.07692308, 0.07692308, 0.07692308, - 0.07692308]) + dfa["geometry"] = self.points + sf = sm.Spatial_Filtering.by_col(dfa, "e", "b", 3, 3, r=30) + r_answer = np.array( + [ + 0.07692308, + 0.07213115, + 0.07213115, + 0.07692308, + 0.07692308, + 0.07692308, + 0.07692308, + 0.07692308, + 0.07692308, + ] + ) x_answer = np.array([10.0, 10.0, 10.0, 20.0, 20.0, 20.0, 30.0, 30.0, 30.0]) - y_answer = np.array([10.000000, 16.666667, 23.333333, - 10.000000, 16.666667, 23.333333, - 10.000000, 16.666667, 23.333333]) - columns = ['e-b_spatial_filtering_{}'.format(name) for name in ['X', 'Y', 'R']] + y_answer = np.array( + [ + 10.000000, + 16.666667, + 23.333333, + 10.000000, + 16.666667, + 23.333333, + 10.000000, + 16.666667, + 23.333333, + ] + ) + columns = ["e-b_spatial_filtering_{}".format(name) for name in ["X", "Y", "R"]] for col, answer in zip(columns, [x_answer, y_answer, r_answer]): np.testing.assert_allclose(sf[col].values, answer, rtol=RTOL, atol=ATOL) + class TestUtils(unittest.TestCase): def test_sum_by_n(self): d = np.array([10, 9, 20, 30]) w = np.array([0.5, 0.1, 0.3, 0.8]) n = 2 - exp_sum = np.array([5.9, 30.]) + exp_sum = np.array([5.9, 30.0]) np.testing.assert_array_almost_equal(exp_sum, sm.sum_by_n(d, w, n)) def test_standardized_mortality_ratio(self): @@ -473,8 +564,9 @@ def test_standardized_mortality_ratio(self): s_b = np.array([1000, 900, 1000, 900, 1000, 900, 1000, 900]) n = 2 exp_smr = np.array([2.48691099, 2.73684211]) - np.testing.assert_array_almost_equal(exp_smr, - sm.standardized_mortality_ratio(e, b, s_e, s_b, n)) + np.testing.assert_array_almost_equal( + exp_smr, sm.standardized_mortality_ratio(e, b, s_e, s_b, n) + ) def test_choynowski(self): e = np.array([30, 25, 25, 15, 33, 21, 30, 20]) @@ -486,19 +578,24 @@ def test_choynowski(self): def test_assuncao_rate(self): e = np.array([30, 25, 25, 15, 33, 21, 30, 20]) b = np.array([100, 100, 110, 90, 100, 90, 110, 90]) - exp_assuncao = np.array([1.03843594, -0.04099089, -0.56250375, - -1.73061861]) - np.testing.assert_array_almost_equal( - exp_assuncao, sm.assuncao_rate(e, b)[:4]) + exp_assuncao = np.array([1.03843594, -0.04099089, -0.56250375, -1.73061861]) + np.testing.assert_array_almost_equal(exp_assuncao, sm.assuncao_rate(e, b)[:4]) suite = unittest.TestSuite() -test_classes = [TestFlatten, TestWMean, TestAgeStd, TestSRate, TestHB, - TestKernel_AgeAdj_SM, TestUtils] +test_classes = [ + TestFlatten, + TestWMean, + TestAgeStd, + TestSRate, + TestHB, + TestKernel_AgeAdj_SM, + TestUtils, +] for i in test_classes: a = unittest.TestLoader().loadTestsFromTestCase(i) suite.addTest(a) -if __name__ == '__main__': +if __name__ == "__main__": runner = unittest.TextTestRunner() runner.run(suite) diff --git a/esda/tests/test_topo.py b/esda/tests/test_topo.py index b2bfe95a..f4c002ca 100644 --- a/esda/tests/test_topo.py +++ b/esda/tests/test_topo.py @@ -1,7 +1,9 @@ -import numpy, copy, pandas from unittest import TestCase -from ..topo import prominence, isolation, to_elevation, weights +import numpy +import pandas + +from ..topo import isolation, prominence, to_elevation, weights class TopoTester(TestCase): @@ -71,8 +73,9 @@ def test_isolation_valid(self): assert numpy.isnan(iso.loc[4, "parent_rank"]) assert (iso.dropna().parent_index == 4).all() assert ( - iso.sort_values("marks", ascending=False).index - == iso.sort_values("rank").index + iso.sort_values("marks", ascending=False).index == ( + iso.sort_values("rank").index + ) ).all() assert iso.loc[3, "isolation"] == 1.5 assert iso.loc[2, "gap"] == ( @@ -88,8 +91,9 @@ def test_isolation_valid(self): assert numpy.isnan(iso.loc[3, "parent_index"]) assert (iso.dropna().parent_index == [4, 2, 5, 5, 3]).all() assert ( - iso.sort_values("marks", ascending=False).index - == iso.sort_values("rank").index + iso.sort_values("marks", ascending=False).index == ( + iso.sort_values("rank").index + ) ).all() assert iso.loc[1, "isolation"] == 1 assert iso.loc[2, "gap"] == ( diff --git a/esda/tests/test_util.py b/esda/tests/test_util.py index 97e3410c..88d4035d 100644 --- a/esda/tests/test_util.py +++ b/esda/tests/test_util.py @@ -1,9 +1,10 @@ import unittest + import libpysal -from .. import util -from .. import moran import numpy as np +from .. import moran, util + class Fdr_Tester(unittest.TestCase): def setUp(self): diff --git a/esda/topo.py b/esda/topo.py index ff99b9d9..1c96b41d 100644 --- a/esda/topo.py +++ b/esda/topo.py @@ -1,11 +1,12 @@ -import numpy, pandas +import numpy +import pandas +from libpysal import weights from scipy.spatial import distance -from sklearn.utils import check_array from scipy.stats import mode as most_common_value -from libpysal import weights +from sklearn.utils import check_array try: - from tqdm import tqdm as _tqdm + from tqdm import tqdm HAS_TQDM = True except ImportError: @@ -18,27 +19,25 @@ def _passthrough(sequence): def _resolve_metric(X, coordinates, metric): """ - Provide a distance function that you can use to find the distance betwen arbitrary points. + Provide a distance function that you can use + to find the distance betwen arbitrary points. """ if callable(metric): distance_func = metric elif metric.lower() == "haversine": try: from numba import autojit - except: + except ImportError: def autojit(func): return func @autojit def harcdist(p1, p2): - """ Compute the kernel of haversine""" + """Compute the kernel of haversine""" x = numpy.sin(p2[1] - p1[1] / 2) ** 2 - y = ( - numpy.cos(p2[1]) - * numpy.cos(p1[1]) - * numpy.sin((p2[0] - p1[0]) / 2) ** 2 - ) + cosp1, cosp2 = numpy.cos(p1[1]), numpy.cos(p2[1]) + y = cosp2 * cosp1 * numpy.sin((p2[0] - p1[0]) / 2) ** 2 return 2 * numpy.arcsin(numpy.sqrt(x + y)) distance_func = harcdist @@ -54,7 +53,7 @@ def harcdist(p1, p2): ) def lookup_distance(a, b): - """ Find location of points a,b in X and return precomputed distances""" + """Find location of points a,b in X and return precomputed distances""" (aloc,) = (X == a).all(axis=1).nonzero() (bloc,) = (X == b).all(axis=1).nonzero() if (len(aloc) > 1) or (len(bloc) > 1): @@ -99,8 +98,9 @@ def isolation( (N, p) array of data to use as input. If p > 1, the "elevation" is computed using the topo.to_elevation function. coordinates : numpy.ndarray - (N,k) array of locations for X to compute distances. If metric='precomputed', this - should contain the distances from each point to every other point, and k == N. + (N,k) array of locations for X to compute distances. If + metric='precomputed', this should contain the distances from + each point to every other point, and k == N. metric : string or callable (default: 'euclidean') name of distance metric in scipy.spatial.distance, or function, that can be used to compute distances between locations. If 'precomputed', ad-hoc function @@ -134,10 +134,7 @@ def isolation( if progressbar and HAS_TQDM: pbar = tqdm elif progressbar and (not HAS_TQDM): - try: - import tqdm - except ImportError as e: - raise ImportError("the tqdm module is required for progressbars") + raise ImportError("The `tqdm` module is required for progressbars.") else: pbar = _passthrough @@ -202,8 +199,8 @@ def prominence( Returns ------- - the prominence of each observation in X, possibly along with the set of saddle points, - peaks, and/or dominating peak tree. + the prominence of each observation in X, possibly along with the + set of saddle points, peaks, and/or dominating peak tree. Notes ----- @@ -242,10 +239,7 @@ def prominence( if progressbar and HAS_TQDM: pbar = tqdm elif progressbar and (not HAS_TQDM): - try: - import tqdm - except ImportError as e: - raise ImportError("the tqdm module is required for progressbars") + raise ImportError("The `tqdm` module is required for progressbars.") else: pbar = _passthrough @@ -260,9 +254,10 @@ def prominence( msg = "assessing {} (rank: {}, value: {})".format(this_full_ix, rank, value) # use the dominating_peak vector. A new obs either has: - # 1. neighbors whose dominating_peak are all -1 (new peak) - # 2. neighbors whose dominating_peak are all -1 or an integer (slope of current peak) - # 3. neighbors whose dominating_peak include at least two integers and any -1 (key col) + # Neighbors whose dominating_peak... + # 1. ... are all -1 (new peak) + # 2. ... are all -1 or an integer (slope of current peak) + # 3. ... include at least two integers and any -1 (key col) _, neighbs = connectivity[this_full_ix].toarray().nonzero() this_preds = predecessors[neighbs] @@ -336,23 +331,24 @@ def prominence( if verbose: print( - "--------------------------------------------\nat the {}" - " iteration:\n{}\n\tpeaks\t{}\n\tprominence\t{}\n\tkey_cols\t{}\n" - .format(rank, msg, peaks, prominence, key_cols) + ( + "--------------------------------------------\n" + "at the {rank} iteration:\n{msg}\n\tpeaks\t{peaks}\n" + "\tprominence\t{prominence}\n\tkey_cols\t{key_cols}\n" + ) ) if gdf is not None: peakframe = gdf.iloc[peaks] keycolframe = gdf.iloc[list(key_cols.values())] - slopeframe = gdf[ - (~(gdf.index.isin(peakframe.index) | gdf.index.isin(keycolframe.index))) - & mask - ] + _isin_peak = gdf.index.isin(peakframe.index) + _isin_keycol = gdf.index.isin(keycolframe.index) + slopeframe = gdf[~(_isin_peak | _isin_keycol) & mask] rest = gdf[~mask] this_geom = gdf.iloc[[this_full_ix]] - ax = rest.plot(edgecolor="k", linewidth=0.1, facecolor="lightblue") - ax = slopeframe.plot(edgecolor="k", linewidth=0.1, facecolor="linen", ax=ax) - ax = keycolframe.plot(edgecolor="k", linewidth=0.1, facecolor="red", ax=ax) - ax = peakframe.plot(edgecolor="k", linewidth=0.1, facecolor="yellow", ax=ax) + ax = rest.plot(edgecolor="k", lw=0.1, fc="lightblue") + ax = slopeframe.plot(ec="k", lw=0.1, fc="linen", ax=ax) + ax = keycolframe.plot(ec="k", lw=0.1, fc="red", ax=ax) + ax = peakframe.plot(ec="k", lw=0.1, fc="yellow", ax=ax) ax = this_geom.centroid.plot(ax=ax, color="orange", marker="*") plt.show() command = input() @@ -391,14 +387,16 @@ def to_elevation(X, middle="mean", metric="euclidean"): X : numpy.ndarray Array of values for which to compute elevation. middle : callable or string - name of function in numpy (or function itself) used to compute the center point of X + name of function in numpy (or function itself) + used to compute the center point of X metric : string - metric to use in `scipy.spatial.distance.cdist` to compute the distance from the center - of mass to the point. + metric to use in `scipy.spatial.distance.cdist` to + compute the distance from the center of mass to the point. Returns -------- - (N,1)-shaped numpy array containing the "elevation" of each point relative to sea level (zero). + (N,1)-shaped numpy array containing the "elevation" of + each point relative to sea level (zero). """ if X.ndim == 1: @@ -429,7 +427,7 @@ def _check_connectivity(connectivity_or_coordinates): 3. a set of coordinates that we need to build the delaunay triangulation and then return the graph. """ - from scipy.sparse import issparse, csc_matrix + from scipy.sparse import issparse if issparse(connectivity_or_coordinates): shape = connectivity_or_coordinates.shape @@ -446,9 +444,10 @@ def _check_connectivity(connectivity_or_coordinates): if __name__ == "__main__": - import geopandas, pandas - from libpysal import weights, examples + import geopandas import matplotlib.pyplot as plt + import pandas # noqa F811 + from libpysal import examples, weights # noqa F811 from matplotlib import cm current_cmap = cm.get_cmap() diff --git a/setup.cfg b/setup.cfg index ac51808d..d93df069 100644 --- a/setup.cfg +++ b/setup.cfg @@ -5,3 +5,16 @@ versionfile_source = esda/_version.py versionfile_build = esda/_version.py tag_prefix = v parentdir_prefix = esda- + +[flake8] +max_line_length = 88 +exclude = docs/conf.py, versioneer.py, esda/_version.py +ignore = + F403, + F405 + +[black] +line-length = 88 + +[isort] +profile = black \ No newline at end of file diff --git a/setup.py b/setup.py index 1181f32a..71c20ac1 100644 --- a/setup.py +++ b/setup.py @@ -1,7 +1,9 @@ # coding: utf-8 -from setuptools import setup, find_packages -from distutils.command.build_py import build_py import os +from distutils.command.build_py import build_py + +from setuptools import find_packages, setup + import versioneer package = "esda" diff --git a/versioneer.py b/versioneer.py index 97130070..e283ecbf 100644 --- a/versioneer.py +++ b/versioneer.py @@ -1,4 +1,3 @@ - # Version: 0.20 """The Versioneer - like a rocketeer, but for versions. @@ -303,11 +302,13 @@ def get_root(): setup_py = os.path.join(root, "setup.py") versioneer_py = os.path.join(root, "versioneer.py") if not (os.path.exists(setup_py) or os.path.exists(versioneer_py)): - err = ("Versioneer was unable to run the project root directory. " - "Versioneer requires setup.py to be executed from " - "its immediate directory (like 'python setup.py COMMAND'), " - "or in a way that lets it use sys.argv[0] to find the root " - "(like 'python path/to/setup.py COMMAND').") + err = ( + "Versioneer was unable to run the project root directory. " + "Versioneer requires setup.py to be executed from " + "its immediate directory (like 'python setup.py COMMAND'), " + "or in a way that lets it use sys.argv[0] to find the root " + "(like 'python path/to/setup.py COMMAND')." + ) raise VersioneerBadRootError(err) try: # Certain runtime workflows (setup.py install/develop in a setuptools @@ -320,8 +321,10 @@ def get_root(): me_dir = os.path.normcase(os.path.splitext(my_path)[0]) vsr_dir = os.path.normcase(os.path.splitext(versioneer_py)[0]) if me_dir != vsr_dir: - print("Warning: build in %s is using versioneer.py from %s" - % (os.path.dirname(my_path), versioneer_py)) + print( + "Warning: build in %s is using versioneer.py from %s" + % (os.path.dirname(my_path), versioneer_py) + ) except NameError: pass return root @@ -367,16 +370,17 @@ class NotThisMethod(Exception): def register_vcs_handler(vcs, method): # decorator """Create decorator to mark a method as the handler of a VCS.""" + def decorate(f): """Store f in HANDLERS[vcs][method].""" HANDLERS.setdefault(vcs, {})[method] = f return f + return decorate # pylint:disable=too-many-arguments,consider-using-with # noqa -def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False, - env=None): +def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False, env=None): """Call the given command(s).""" assert isinstance(commands, list) process = None @@ -384,10 +388,13 @@ def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False, try: dispcmd = str([command] + args) # remember shell=False, so use git.cmd on windows, not just git - process = subprocess.Popen([command] + args, cwd=cwd, env=env, - stdout=subprocess.PIPE, - stderr=(subprocess.PIPE if hide_stderr - else None)) + process = subprocess.Popen( + [command] + args, + cwd=cwd, + env=env, + stdout=subprocess.PIPE, + stderr=(subprocess.PIPE if hide_stderr else None), + ) break except EnvironmentError: e = sys.exc_info()[1] @@ -410,7 +417,9 @@ def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False, return stdout, process.returncode -LONG_VERSION_PY['git'] = r''' +LONG_VERSION_PY[ + "git" +] = r''' # This file helps to compute a version number in source trees obtained from # git-archive tarball (such as those provided by githubs download-from-tag # feature). Distribution tarballs (built by setup.py sdist) and build @@ -1091,7 +1100,7 @@ def git_versions_from_keywords(keywords, tag_prefix, verbose): # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of # just "foo-1.0". If we see a "tag: " prefix, prefer those. TAG = "tag: " - tags = {r[len(TAG):] for r in refs if r.startswith(TAG)} + tags = {r[len(TAG) :] for r in refs if r.startswith(TAG)} if not tags: # Either we're using git < 1.8.3, or there really are no tags. We use # a heuristic: assume all version tags have a digit. The old git %d @@ -1100,7 +1109,7 @@ def git_versions_from_keywords(keywords, tag_prefix, verbose): # between branches and tags. By ignoring refnames without digits, we # filter out many common branch names like "release" and # "stabilization", as well as "HEAD" and "master". - tags = {r for r in refs if re.search(r'\d', r)} + tags = {r for r in refs if re.search(r"\d", r)} if verbose: print("discarding '%s', no digits" % ",".join(refs - tags)) if verbose: @@ -1108,24 +1117,31 @@ def git_versions_from_keywords(keywords, tag_prefix, verbose): for ref in sorted(tags): # sorting will prefer e.g. "2.0" over "2.0rc1" if ref.startswith(tag_prefix): - r = ref[len(tag_prefix):] + r = ref[len(tag_prefix) :] # Filter out refs that exactly match prefix or that don't start # with a number once the prefix is stripped (mostly a concern # when prefix is '') - if not re.match(r'\d', r): + if not re.match(r"\d", r): continue if verbose: print("picking %s" % r) - return {"version": r, - "full-revisionid": keywords["full"].strip(), - "dirty": False, "error": None, - "date": date} + return { + "version": r, + "full-revisionid": keywords["full"].strip(), + "dirty": False, + "error": None, + "date": date, + } # no suitable tags, so version is "0+unknown", but full hex is still there if verbose: print("no suitable tags, using unknown + full revision id") - return {"version": "0+unknown", - "full-revisionid": keywords["full"].strip(), - "dirty": False, "error": "no suitable tags", "date": None} + return { + "version": "0+unknown", + "full-revisionid": keywords["full"].strip(), + "dirty": False, + "error": "no suitable tags", + "date": None, + } @register_vcs_handler("git", "pieces_from_vcs") @@ -1140,8 +1156,7 @@ def git_pieces_from_vcs(tag_prefix, root, verbose, runner=run_command): if sys.platform == "win32": GITS = ["git.cmd", "git.exe"] - _, rc = runner(GITS, ["rev-parse", "--git-dir"], cwd=root, - hide_stderr=True) + _, rc = runner(GITS, ["rev-parse", "--git-dir"], cwd=root, hide_stderr=True) if rc != 0: if verbose: print("Directory %s not under git control" % root) @@ -1149,10 +1164,19 @@ def git_pieces_from_vcs(tag_prefix, root, verbose, runner=run_command): # if there is a tag matching tag_prefix, this yields TAG-NUM-gHEX[-dirty] # if there isn't one, this yields HEX[-dirty] (no NUM) - describe_out, rc = runner(GITS, ["describe", "--tags", "--dirty", - "--always", "--long", - "--match", "%s*" % tag_prefix], - cwd=root) + describe_out, rc = runner( + GITS, + [ + "describe", + "--tags", + "--dirty", + "--always", + "--long", + "--match", + "%s*" % tag_prefix, + ], + cwd=root, + ) # --long was added in git-1.5.5 if describe_out is None: raise NotThisMethod("'git describe' failed") @@ -1167,8 +1191,7 @@ def git_pieces_from_vcs(tag_prefix, root, verbose, runner=run_command): pieces["short"] = full_out[:7] # maybe improved later pieces["error"] = None - branch_name, rc = runner(GITS, ["rev-parse", "--abbrev-ref", "HEAD"], - cwd=root) + branch_name, rc = runner(GITS, ["rev-parse", "--abbrev-ref", "HEAD"], cwd=root) # --abbrev-ref was added in git-1.6.3 if rc != 0 or branch_name is None: raise NotThisMethod("'git rev-parse --abbrev-ref' returned error") @@ -1208,17 +1231,16 @@ def git_pieces_from_vcs(tag_prefix, root, verbose, runner=run_command): dirty = git_describe.endswith("-dirty") pieces["dirty"] = dirty if dirty: - git_describe = git_describe[:git_describe.rindex("-dirty")] + git_describe = git_describe[: git_describe.rindex("-dirty")] # now we have TAG-NUM-gHEX or HEX if "-" in git_describe: # TAG-NUM-gHEX - mo = re.search(r'^(.+)-(\d+)-g([0-9a-f]+)$', git_describe) + mo = re.search(r"^(.+)-(\d+)-g([0-9a-f]+)$", git_describe) if not mo: # unparseable. Maybe git-describe is misbehaving? - pieces["error"] = ("unable to parse git-describe output: '%s'" - % describe_out) + pieces["error"] = "unable to parse git-describe output: '%s'" % describe_out return pieces # tag @@ -1227,10 +1249,12 @@ def git_pieces_from_vcs(tag_prefix, root, verbose, runner=run_command): if verbose: fmt = "tag '%s' doesn't start with prefix '%s'" print(fmt % (full_tag, tag_prefix)) - pieces["error"] = ("tag '%s' doesn't start with prefix '%s'" - % (full_tag, tag_prefix)) + pieces["error"] = "tag '%s' doesn't start with prefix '%s'" % ( + full_tag, + tag_prefix, + ) return pieces - pieces["closest-tag"] = full_tag[len(tag_prefix):] + pieces["closest-tag"] = full_tag[len(tag_prefix) :] # distance: number of commits since tag pieces["distance"] = int(mo.group(2)) @@ -1303,15 +1327,21 @@ def versions_from_parentdir(parentdir_prefix, root, verbose): for _ in range(3): dirname = os.path.basename(root) if dirname.startswith(parentdir_prefix): - return {"version": dirname[len(parentdir_prefix):], - "full-revisionid": None, - "dirty": False, "error": None, "date": None} + return { + "version": dirname[len(parentdir_prefix) :], + "full-revisionid": None, + "dirty": False, + "error": None, + "date": None, + } rootdirs.append(root) root = os.path.dirname(root) # up a level if verbose: - print("Tried directories %s but none started with prefix %s" % - (str(rootdirs), parentdir_prefix)) + print( + "Tried directories %s but none started with prefix %s" + % (str(rootdirs), parentdir_prefix) + ) raise NotThisMethod("rootdir doesn't start with parentdir_prefix") @@ -1340,11 +1370,13 @@ def versions_from_file(filename): contents = f.read() except EnvironmentError: raise NotThisMethod("unable to read _version.py") - mo = re.search(r"version_json = '''\n(.*)''' # END VERSION_JSON", - contents, re.M | re.S) + mo = re.search( + r"version_json = '''\n(.*)''' # END VERSION_JSON", contents, re.M | re.S + ) if not mo: - mo = re.search(r"version_json = '''\r\n(.*)''' # END VERSION_JSON", - contents, re.M | re.S) + mo = re.search( + r"version_json = '''\r\n(.*)''' # END VERSION_JSON", contents, re.M | re.S + ) if not mo: raise NotThisMethod("no version_json in _version.py") return json.loads(mo.group(1)) @@ -1353,8 +1385,7 @@ def versions_from_file(filename): def write_to_version_file(filename, versions): """Write the given version number to the given _version.py file.""" os.unlink(filename) - contents = json.dumps(versions, sort_keys=True, - indent=1, separators=(",", ": ")) + contents = json.dumps(versions, sort_keys=True, indent=1, separators=(",", ": ")) with open(filename, "w") as f: f.write(SHORT_VERSION_PY % contents) @@ -1386,8 +1417,7 @@ def render_pep440(pieces): rendered += ".dirty" else: # exception #1 - rendered = "0+untagged.%d.g%s" % (pieces["distance"], - pieces["short"]) + rendered = "0+untagged.%d.g%s" % (pieces["distance"], pieces["short"]) if pieces["dirty"]: rendered += ".dirty" return rendered @@ -1416,8 +1446,7 @@ def render_pep440_branch(pieces): rendered = "0" if pieces["branch"] != "master": rendered += ".dev0" - rendered += "+untagged.%d.g%s" % (pieces["distance"], - pieces["short"]) + rendered += "+untagged.%d.g%s" % (pieces["distance"], pieces["short"]) if pieces["dirty"]: rendered += ".dirty" return rendered @@ -1560,11 +1589,13 @@ def render_git_describe_long(pieces): def render(pieces, style): """Render the given version pieces into the requested style.""" if pieces["error"]: - return {"version": "unknown", - "full-revisionid": pieces.get("long"), - "dirty": None, - "error": pieces["error"], - "date": None} + return { + "version": "unknown", + "full-revisionid": pieces.get("long"), + "dirty": None, + "error": pieces["error"], + "date": None, + } if not style or style == "default": style = "pep440" # the default @@ -1588,9 +1619,13 @@ def render(pieces, style): else: raise ValueError("unknown style '%s'" % style) - return {"version": rendered, "full-revisionid": pieces["long"], - "dirty": pieces["dirty"], "error": None, - "date": pieces.get("date")} + return { + "version": rendered, + "full-revisionid": pieces["long"], + "dirty": pieces["dirty"], + "error": None, + "date": pieces.get("date"), + } class VersioneerBadRootError(Exception): @@ -1613,8 +1648,9 @@ def get_versions(verbose=False): handlers = HANDLERS.get(cfg.VCS) assert handlers, "unrecognized VCS '%s'" % cfg.VCS verbose = verbose or cfg.verbose - assert cfg.versionfile_source is not None, \ - "please set versioneer.versionfile_source" + assert ( + cfg.versionfile_source is not None + ), "please set versioneer.versionfile_source" assert cfg.tag_prefix is not None, "please set versioneer.tag_prefix" versionfile_abs = os.path.join(root, cfg.versionfile_source) @@ -1668,9 +1704,13 @@ def get_versions(verbose=False): if verbose: print("unable to compute version") - return {"version": "0+unknown", "full-revisionid": None, - "dirty": None, "error": "unable to compute version", - "date": None} + return { + "version": "0+unknown", + "full-revisionid": None, + "dirty": None, + "error": "unable to compute version", + "date": None, + } def get_version(): @@ -1723,6 +1763,7 @@ def run(self): print(" date: %s" % vers.get("date")) if vers["error"]: print(" error: %s" % vers["error"]) + cmds["version"] = cmd_version # we override "build_py" in both distutils and setuptools @@ -1741,8 +1782,8 @@ def run(self): # setup.py egg_info -> ? # we override different "build_py" commands for both environments - if 'build_py' in cmds: - _build_py = cmds['build_py'] + if "build_py" in cmds: + _build_py = cmds["build_py"] elif "setuptools" in sys.modules: from setuptools.command.build_py import build_py as _build_py else: @@ -1757,14 +1798,14 @@ def run(self): # now locate _version.py in the new build/ directory and replace # it with an updated value if cfg.versionfile_build: - target_versionfile = os.path.join(self.build_lib, - cfg.versionfile_build) + target_versionfile = os.path.join(self.build_lib, cfg.versionfile_build) print("UPDATING %s" % target_versionfile) write_to_version_file(target_versionfile, versions) + cmds["build_py"] = cmd_build_py - if 'build_ext' in cmds: - _build_ext = cmds['build_ext'] + if "build_ext" in cmds: + _build_ext = cmds["build_ext"] elif "setuptools" in sys.modules: from setuptools.command.build_ext import build_ext as _build_ext else: @@ -1784,14 +1825,15 @@ def run(self): return # now locate _version.py in the new build/ directory and replace # it with an updated value - target_versionfile = os.path.join(self.build_lib, - cfg.versionfile_build) + target_versionfile = os.path.join(self.build_lib, cfg.versionfile_build) print("UPDATING %s" % target_versionfile) write_to_version_file(target_versionfile, versions) + cmds["build_ext"] = cmd_build_ext if "cx_Freeze" in sys.modules: # cx_freeze enabled? from cx_Freeze.dist import build_exe as _build_exe + # nczeczulin reports that py2exe won't like the pep440-style string # as FILEVERSION, but it can be used for PRODUCTVERSION, e.g. # setup(console=[{ @@ -1812,17 +1854,21 @@ def run(self): os.unlink(target_versionfile) with open(cfg.versionfile_source, "w") as f: LONG = LONG_VERSION_PY[cfg.VCS] - f.write(LONG % - {"DOLLAR": "$", - "STYLE": cfg.style, - "TAG_PREFIX": cfg.tag_prefix, - "PARENTDIR_PREFIX": cfg.parentdir_prefix, - "VERSIONFILE_SOURCE": cfg.versionfile_source, - }) + f.write( + LONG + % { + "DOLLAR": "$", + "STYLE": cfg.style, + "TAG_PREFIX": cfg.tag_prefix, + "PARENTDIR_PREFIX": cfg.parentdir_prefix, + "VERSIONFILE_SOURCE": cfg.versionfile_source, + } + ) + cmds["build_exe"] = cmd_build_exe del cmds["build_py"] - if 'py2exe' in sys.modules: # py2exe enabled? + if "py2exe" in sys.modules: # py2exe enabled? from py2exe.distutils_buildexe import py2exe as _py2exe class cmd_py2exe(_py2exe): @@ -1838,18 +1884,22 @@ def run(self): os.unlink(target_versionfile) with open(cfg.versionfile_source, "w") as f: LONG = LONG_VERSION_PY[cfg.VCS] - f.write(LONG % - {"DOLLAR": "$", - "STYLE": cfg.style, - "TAG_PREFIX": cfg.tag_prefix, - "PARENTDIR_PREFIX": cfg.parentdir_prefix, - "VERSIONFILE_SOURCE": cfg.versionfile_source, - }) + f.write( + LONG + % { + "DOLLAR": "$", + "STYLE": cfg.style, + "TAG_PREFIX": cfg.tag_prefix, + "PARENTDIR_PREFIX": cfg.parentdir_prefix, + "VERSIONFILE_SOURCE": cfg.versionfile_source, + } + ) + cmds["py2exe"] = cmd_py2exe # we override different "sdist" commands for both environments - if 'sdist' in cmds: - _sdist = cmds['sdist'] + if "sdist" in cmds: + _sdist = cmds["sdist"] elif "setuptools" in sys.modules: from setuptools.command.sdist import sdist as _sdist else: @@ -1874,8 +1924,10 @@ def make_release_tree(self, base_dir, files): # updated value target_versionfile = os.path.join(base_dir, cfg.versionfile_source) print("UPDATING %s" % target_versionfile) - write_to_version_file(target_versionfile, - self._versioneer_generated_versions) + write_to_version_file( + target_versionfile, self._versioneer_generated_versions + ) + cmds["sdist"] = cmd_sdist return cmds @@ -1935,11 +1987,13 @@ def do_setup(): root = get_root() try: cfg = get_config_from_root(root) - except (EnvironmentError, configparser.NoSectionError, - configparser.NoOptionError) as e: + except ( + EnvironmentError, + configparser.NoSectionError, + configparser.NoOptionError, + ) as e: if isinstance(e, (EnvironmentError, configparser.NoSectionError)): - print("Adding sample versioneer config to setup.cfg", - file=sys.stderr) + print("Adding sample versioneer config to setup.cfg", file=sys.stderr) with open(os.path.join(root, "setup.cfg"), "a") as f: f.write(SAMPLE_CONFIG) print(CONFIG_ERROR, file=sys.stderr) @@ -1948,15 +2002,18 @@ def do_setup(): print(" creating %s" % cfg.versionfile_source) with open(cfg.versionfile_source, "w") as f: LONG = LONG_VERSION_PY[cfg.VCS] - f.write(LONG % {"DOLLAR": "$", - "STYLE": cfg.style, - "TAG_PREFIX": cfg.tag_prefix, - "PARENTDIR_PREFIX": cfg.parentdir_prefix, - "VERSIONFILE_SOURCE": cfg.versionfile_source, - }) - - ipy = os.path.join(os.path.dirname(cfg.versionfile_source), - "__init__.py") + f.write( + LONG + % { + "DOLLAR": "$", + "STYLE": cfg.style, + "TAG_PREFIX": cfg.tag_prefix, + "PARENTDIR_PREFIX": cfg.parentdir_prefix, + "VERSIONFILE_SOURCE": cfg.versionfile_source, + } + ) + + ipy = os.path.join(os.path.dirname(cfg.versionfile_source), "__init__.py") if os.path.exists(ipy): try: with open(ipy, "r") as f: @@ -2004,8 +2061,10 @@ def do_setup(): else: print(" 'versioneer.py' already in MANIFEST.in") if cfg.versionfile_source not in simple_includes: - print(" appending versionfile_source ('%s') to MANIFEST.in" % - cfg.versionfile_source) + print( + " appending versionfile_source ('%s') to MANIFEST.in" + % cfg.versionfile_source + ) with open(manifest_in, "a") as f: f.write("include %s\n" % cfg.versionfile_source) else: From d10e9eae38f274efc4a1748d4e285de1961eeabd Mon Sep 17 00:00:00 2001 From: James Gaboardi Date: Mon, 21 Nov 2022 17:22:49 -0500 Subject: [PATCH 2/2] minor typos --- esda/geary_local.py | 2 +- esda/join_counts_local_mv.py | 19 ++++++++----------- 2 files changed, 9 insertions(+), 12 deletions(-) diff --git a/esda/geary_local.py b/esda/geary_local.py index 4d432d4f..728f316d 100644 --- a/esda/geary_local.py +++ b/esda/geary_local.py @@ -65,7 +65,7 @@ def __init__( island_weight : value to use as a weight for the "fake" neighbor for every island. If numpy.nan, will propagate to the final local statistic depending - on the `stat_func`. If 0, thenthe lag is always zero for islands. + on the `stat_func`. If 0, then the lag is always zero for islands. Attributes ---------- diff --git a/esda/join_counts_local_mv.py b/esda/join_counts_local_mv.py index 0be44d98..0f52b718 100644 --- a/esda/join_counts_local_mv.py +++ b/esda/join_counts_local_mv.py @@ -31,25 +31,22 @@ def __init__( the relationships between observed units. Need not be row-standardized. permutations : int - number of random permutations for calculation of pseudo - p_values + number of random permutations for calculation of pseudo p_values n_jobs : int Number of cores to be used in the conditional randomisation. If -1, all available cores are used. - keep_simulations : boll - (default=True) - If True, the entire matrix of replications under the null - is stored in memory and accessible; otherwise, replications - are not saved - seed : None/int + keep_simulations : bool (default True) + If ``True``, the entire matrix of replications under the null is stored + in memory and accessible; otherwise, replications are not saved + seed : int (default None) Seed to ensure reproducibility of conditional randomizations. Must be set here, and not outside of the function, since numba does not correctly interpret external seeds nor numpy.random.RandomState instances. - island_weight: + island_weight : int or float (default 0) value to use as a weight for the "fake" neighbor for every island. - If numpy.nan, will propagate to the final local statistic depending - on the `stat_func`. If 0, then the lag is always zero for islands. + If ``numpy.nan``, will propagate to the final local statistic depending + on the ``stat_func``. If ``0``, then the lag is always zero for islands. """