Skip to content

graph.Graph build_* constructors #542

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 11 commits into from
Aug 7, 2023
2 changes: 1 addition & 1 deletion libpysal/graph/_contiguity.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

from ._utils import _neighbor_dict_to_edges, _validate_geometry_input

_VALID_GEOMETRY_TYPES = ("Polygon", "MultiPolygon", "LineString", "MultiLineString")
_VALID_GEOMETRY_TYPES = ["Polygon", "MultiPolygon", "LineString", "MultiLineString"]


def _vertex_set_intersection(geoms, rook=True, ids=None, by_perimeter=False):
Expand Down
49 changes: 15 additions & 34 deletions libpysal/graph/_kernel.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
import numpy
import scipy
from scipy import sparse, optimize, spatial, stats

from ._utils import _validate_geometry_input

_VALID_GEOMETRY_TYPES = ("Point",)
_VALID_GEOMETRY_TYPES = ["Point"]


def _triangular(distances, bandwidth):
Expand Down Expand Up @@ -53,7 +53,7 @@ def _identity(distances, bandwidth):
}


def kernel(
def _kernel(
coordinates,
bandwidth=None,
metric="euclidean",
Expand Down Expand Up @@ -107,7 +107,7 @@ def kernel(

"""
coordinates, ids, geoms = _validate_geometry_input(
coordinates, ids=ids, valid_geom_types=_VALID_GEOMETRY_TYPES
coordinates, ids=ids, valid_geometry_types=_VALID_GEOMETRY_TYPES
)
if metric == "precomputed":
assert (
Expand All @@ -130,18 +130,18 @@ def kernel(
D_linear_flat = D_linear.flatten()
if metric == "haversine":
D_linear_flat * 6371 # express haversine distances in kilometers
D = scipy.sparse.csc_array(
D = sparse.csc_array(
(D_linear_flat, (self_ix_flat, neighbor_ix_flat)),
shape=(n_samples, n_samples),
)
else:
D = coordinates * (coordinates.argsort(axis=1, kind="stable") < (k + 1))
else:
if metric != "precomputed":
D = scipy.spatial.distance.pdist(coordinates, metric=metric)
D = scipy.sparse.csc_array(scipy.spatial.distance.squareform(D))
D = spatial.distance.pdist(coordinates, metric=metric)
D = sparse.csc_array(spatial.distance.squareform(D))
else:
D = scipy.sparse.csc_array(coordinates)
D = sparse.csc_array(coordinates)
if bandwidth is None:
bandwidth = numpy.percentile(D.data, 25)
elif bandwidth == "opt":
Expand All @@ -150,32 +150,11 @@ def kernel(
smooth = kernel(D.data, bandwidth)
else:
smooth = _kernel_functions[kernel](D.data, bandwidth)
return scipy.sparse.csc_array((smooth, D.indices, D.indptr), dtype=smooth.dtype)

sp = sparse.csc_array((smooth, D.indices, D.indptr), dtype=smooth.dtype)
sp.eliminate_zeros()

def knn(
coordinates,
metric="euclidean",
k=2,
ids=None,
p=2,
function="boxcar",
bandwidth=numpy.inf,
):
"""
Compute a K-nearest neighbor weight. Uses kernel() with a kernel="boxcar"
and bandwidth=numpy.inf by default. Consult kernel() for further argument
specifications.
"""
return kernel(
coordinates,
metric=metric,
k=k,
ids=ids,
p=p,
function=function,
bandwidth=bandwidth,
)
return sp, ids


def _prepare_tree_query(coordinates, metric, p=2):
Expand Down Expand Up @@ -223,7 +202,9 @@ def _optimize_bandwidth(D, kernel):
def _loss(bandwidth, D=D, kernel_function=kernel_function):
Ku = kernel_function(D.data, bandwidth)
bins, _ = numpy.histogram(Ku, bins=int(D.shape[0] ** 0.5), range=(0, 1))
return -scipy.stats.entropy(bins / bins.sum())
return -stats.entropy(bins / bins.sum())

xopt = minimize_scalar(_loss, bounds=(0, D.data.max() * 2), method="bounded")
xopt = optimize.minimize_scalar(
_loss, bounds=(0, D.data.max() * 2), method="bounded"
)
return xopt.x
155 changes: 73 additions & 82 deletions libpysal/graph/_triangulation.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,21 +2,20 @@

import numpy
import pandas
from scipy.spatial import Delaunay as _Delaunay
from scipy import spatial

from ._contiguity import _queen, _rook, _vertex_set_intersection
from ._contiguity import _vertex_set_intersection
from ._kernel import _kernel_functions
from ._utils import _validate_geometry_input
from .base import Graph

from libpysal.cg import voronoi_frames

try:
from numba import njit
from numba import njit # noqa E401
except ModuleNotFoundError:
from libpysal.common import jit as njit

_VALID_GEOMETRY_TYPES = ("Point")
_VALID_GEOMETRY_TYPES = ["Point"]

__author__ = """"
Levi John Wolf ([email protected])
Expand All @@ -25,7 +24,7 @@
"""


def delaunay(coordinates, ids=None, bandwidth=numpy.inf, kernel="boxcar"):
def _delaunay(coordinates, ids=None, bandwidth=numpy.inf, kernel="boxcar"):
"""
Constructor of the Delaunay graph of a set of input points.
Relies on scipy.spatial.Delaunay and numba to quickly construct
Expand Down Expand Up @@ -73,42 +72,38 @@ def delaunay(coordinates, ids=None, bandwidth=numpy.inf, kernel="boxcar"):
"""

try:
from numba import njit
from numba import njit # noqa E401
except ModuleNotFoundError:
warnings.warn(
"The numba package is åused extensively in this module"
"The numba package is used extensively in this module"
" to accelerate the computation of graphs. Without numba,"
" these computations may become unduly slow on large data."
)
# undefined: geoms, changing to coordinates (SR)
coordinates, ids, geoms = _validate_geometry_input(
coordinates, ids, _ = _validate_geometry_input(
coordinates, ids=ids, valid_geometry_types=_VALID_GEOMETRY_TYPES
)

dt = _Delaunay(coordinates)
edges = _edges_from_simplices(dt.simplices)
edges = (
pandas.DataFrame(numpy.asarray(list(edges)))
.sort_values([0, 1])
.drop_duplicates()
.values
)
edges, _ = _voronoi_edges(coordinates)

ids = numpy.asarray(ids)
head, tail = ids[edges[:, 0]], ids[edges[:, 1]]

distances = ((coordinates[head] - coordinates[tail]) ** 2).sum(
axis=1
).squeeze() ** 0.5
weights = _kernel_functions[kernel](distances, bandwidth)
# check for coincident points which result in
if bandwidth is numpy.inf and kernel == "boxcar":
weights = numpy.ones(head.shape, dtype=numpy.int8)
else:
distances = ((coordinates[edges[:, 0]] - coordinates[edges[:, 1]]) ** 2).sum(
axis=1
).squeeze() ** 0.5
weights = _kernel_functions[kernel](distances, bandwidth)

# TODO: check for coincident points which result in
# dropped points from the triangulation and the
# misalignment of the weights and the attribute array

return Graph.from_arrays(head, tail, weights)
return head, tail, weights


def gabriel(coordinates, ids=None, bandwidth=numpy.inf, kernel="boxcar"):
def _gabriel(coordinates, ids=None, bandwidth=numpy.inf, kernel="boxcar"):
"""
Constructs the Gabriel graph of a set of points. This graph is a subset of
the Delaunay triangulation where only "short" links are retained. This
Expand Down Expand Up @@ -146,14 +141,14 @@ def gabriel(coordinates, ids=None, bandwidth=numpy.inf, kernel="boxcar"):
function for more details.
"""
try:
from numba import njit
from numba import njit # noqa E401
except ModuleNotFoundError:
warnings.warn(
"The numba package is used extensively in this module"
" to accelerate the computation of graphs. Without numba,"
" these computations may become unduly slow on large data."
)
coordinates, ids, geoms = _validate_geometry_input(
coordinates, ids, _ = _validate_geometry_input(
coordinates, ids=ids, valid_geometry_types=_VALID_GEOMETRY_TYPES
)

Expand All @@ -166,14 +161,22 @@ def gabriel(coordinates, ids=None, bandwidth=numpy.inf, kernel="boxcar"):
ids = numpy.asarray(ids)
head, tail = ids[output[:, 0]], ids[output[:, 1]]

distances = ((coordinates[head] - coordinates[tail]) ** 2).sum(
axis=1
).squeeze() ** 0.5
weights = _kernel_functions[kernel](distances, bandwidth)
return Graph.from_arrays(head, tail, weights)
if bandwidth is numpy.inf and kernel == "boxcar":
weights = numpy.ones(head.shape, dtype=numpy.int8)
else:
distances = ((coordinates[output[:, 0]] - coordinates[output[:, 1]]) ** 2).sum(
axis=1
).squeeze() ** 0.5
weights = _kernel_functions[kernel](distances, bandwidth)

# TODO: check for coincident points which result in
# dropped points from the triangulation and the
# misalignment of the weights and the attribute array

return head, tail, weights

def relative_neighborhood(coordinates, ids=None, bandwidth=numpy.inf, kernel="boxcar"):

def _relative_neighborhood(coordinates, ids=None, bandwidth=numpy.inf, kernel="boxcar"):
"""
Constructs the Relative Neighborhood graph from a set of points.
This graph is a subset of the Delaunay triangulation, where only
Expand Down Expand Up @@ -209,32 +212,33 @@ def relative_neighborhood(coordinates, ids=None, bandwidth=numpy.inf, kernel="bo
function for more details.
"""
try:
from numba import njit
from numba import njit # noqa E401
except ModuleNotFoundError:
warnings.warn(
"The numba package is used extensively in this module"
" to accelerate the computation of graphs. Without numba,"
" these computations may become unduly slow on large data."
)
coordinates, ids, geoms = _validate_geometry_input(
coordinates, ids, _ = _validate_geometry_input(
coordinates, ids=ids, valid_geometry_types=_VALID_GEOMETRY_TYPES
)

edges, dt = _voronoi_edges(coordinates)
output, dkmax = _filter_relativehood(edges, dt.points, return_dkmax=False)
output, _ = _filter_relativehood(edges, dt.points, return_dkmax=False)

head_ix, tail_ix, distance = zip(*output)

head, tail, distance = zip(*output)
head, tail = ids[numpy.asarray(head_ix)], ids[numpy.asarray(tail_ix)]

if bandwidth is numpy.inf and kernel == "boxcar":
weights = numpy.ones(head.shape, dtype=numpy.int8)
else:
weights = _kernel_functions[kernel](numpy.array(distance), bandwidth)

weight = _kernel_functions[kernel](numpy.array(distance), bandwidth)
return Graph.from_arrays(head, tail, weight)
return head, tail, weights


def voronoi(
coordinates,
ids=None,
clip="extent",
contiguity_type="v"
):
def _voronoi(coordinates, ids=None, clip="extent", rook=True):
"""
Compute contiguity weights according to a clipped
Voronoi diagram.
Expand All @@ -254,20 +258,25 @@ def voronoi(
to set the indices separately from your input data. Otherwise, use
data.set_index(ids) to ensure ordering is respected. If None, then the index
clip : str (default: 'bbox')
An overloaded option about how to clip the voronoi cells passed to cg.voronoi_frames()
An overloaded option about how to clip the voronoi cells passed to
``libpysal.cg.voronoi_frames()``.
Default is ``'extent'``. Options are as follows.

* ``'none'``/``None`` -- No clip is applied. Voronoi cells may be arbitrarily larger that the source map. Note that this may lead to cells that are many orders of magnitude larger in extent than the original map. Not recommended.
* ``'bbox'``/``'extent'``/``'bounding box'`` -- Clip the voronoi cells to the bounding box of the input points.
* ``'chull``/``'convex hull'`` -- Clip the voronoi cells to the convex hull of the input points.
* ``'ashape'``/``'ahull'`` -- Clip the voronoi cells to the tightest hull that contains all points (e.g. the smallest alphashape, using ``libpysal.cg.alpha_shape_auto``).
* ``'none'``/``None`` -- No clip is applied. Voronoi cells may be arbitrarily
larger that the source map. Note that this may lead to cells that are many
orders of magnitude larger in extent than the original map. Not recommended.
* ``'bbox'``/``'extent'``/``'bounding box'`` -- Clip the voronoi cells to the
bounding box of the input points.
* ``'chull``/``'convex hull'`` -- Clip the voronoi cells to the convex hull of
the input points.
* ``'ashape'``/``'ahull'`` -- Clip the voronoi cells to the tightest hull that
contains all points (e.g. the smallest alphashape, using
``libpysal.cg.alpha_shape_auto``).
* Polygon -- Clip to an arbitrary Polygon.
contiguity_type : str (default: 'v')
What kind of contiguity to apply to the voronoi diagram. There are three
recognized options:
1. "v" (Default): use vertex_set_contiguity()
2. "r": use rook() contiguity
3. "q": use queen() contiguity (not recommended)
rook : bool, optional
Contiguity method. If True, two geometries are considered neighbours if they
share at least one edge. If False, two geometries are considered neighbours
if they share at least one vertex. By default True.

Notes
-----
Expand All @@ -281,27 +290,12 @@ def voronoi(
delaunay triangulations in many applied contexts and
generally will remove "long" links in the delaunay graph.
"""
coordinates, ids, geoms = _validate_geometry_input(
coordinates, ids, _ = _validate_geometry_input(
coordinates, ids=ids, valid_geometry_types=_VALID_GEOMETRY_TYPES
)

cells, generators = voronoi_frames(coordinates, clip=clip)
if contiguity_type == "v":
w = _vertex_set_intersection(cells, ids=ids)
elif contiguity_type == "q":
w = _queen(cells, ids=ids)
elif contiguity_type == "r":
w = _rook(cells, ids=ids)
else:
raise ValueError(
f"Contiguity type {contiguity_type} not understood. Supported options "
"are 'v', 'q', and 'r'"
)

head = w[0]
tail = w[1]
weight = w[2]
return Graph.from_arrays(head, tail, weight)
cells, _ = voronoi_frames(coordinates, clip=clip)
return _vertex_set_intersection(cells, rook=rook, ids=ids)


#### utilities
Expand All @@ -316,8 +310,6 @@ def _edges_from_simplices(simplices):
that are then converted into the six non-directed edges for
each simplex.
"""
dupes = {}

edges = []
for simplex in simplices:
edges.append((simplex[0], simplex[1]))
Expand Down Expand Up @@ -420,13 +412,12 @@ def _filter_relativehood(edges, coordinates, return_dkmax=False):


def _voronoi_edges(coordinates):
dt = _Delaunay(coordinates)
dt = spatial.Delaunay(coordinates)
edges = _edges_from_simplices(dt.simplices)
edges = (
pandas.DataFrame(numpy.asarray(list(edges)))
.sort_values([0, 1])
.drop_duplicates()
.values

pandas.DataFrame(numpy.asarray(list(edges)))
.sort_values([0, 1])
.drop_duplicates()
.values
)
return edges, dt
Loading