diff --git a/bigframes/bigquery/__init__.py b/bigframes/bigquery/__init__.py index 301207bb31..4a1ca4ca8c 100644 --- a/bigframes/bigquery/__init__.py +++ b/bigframes/bigquery/__init__.py @@ -32,6 +32,7 @@ st_difference, st_distance, st_intersection, + st_length, ) from bigframes.bigquery._operations.json import ( json_extract, @@ -58,6 +59,7 @@ "st_difference", "st_distance", "st_intersection", + "st_length", # json ops "json_extract", "json_extract_array", diff --git a/bigframes/bigquery/_operations/geo.py b/bigframes/bigquery/_operations/geo.py index fc9bd1a653..20818d28d6 100644 --- a/bigframes/bigquery/_operations/geo.py +++ b/bigframes/bigquery/_operations/geo.py @@ -380,3 +380,67 @@ def st_intersection( each aligned geometry with other. """ return series._apply_binary_op(other, ops.geo_st_intersection_op) + + +def st_length( + series: Union[bigframes.series.Series, bigframes.geopandas.GeoSeries], + *, + use_spheroid: bool = False, +) -> bigframes.series.Series: + """Returns the total length in meters of the lines in the input GEOGRAPHY. + + If a series element is a point or a polygon, returns zero for that row. + If a series element is a collection, returns the length of the lines + in the collection; if the collection doesn't contain lines, returns + zero. + + The optional use_spheroid parameter determines how this function + measures distance. If use_spheroid is FALSE, the function measures + distance on the surface of a perfect sphere. + + The use_spheroid parameter currently only supports the value FALSE. The + default value of use_spheroid is FALSE. See: + https://cloud.google.com/bigquery/docs/reference/standard-sql/geography_functions#st_length + + **Examples:** + + >>> import bigframes.geopandas + >>> import bigframes.pandas as bpd + >>> import bigframes.bigquery as bbq + >>> from shapely.geometry import Polygon, LineString, Point, GeometryCollection + >>> bpd.options.display.progress_bar = None + + >>> series = bigframes.geopandas.GeoSeries( + ... [ + ... LineString([(0, 0), (1, 0)]), # Length will be approx 1 degree in meters + ... Polygon([(0.0, 0.0), (0.1, 0.1), (0.0, 0.1)]), # Length is 0 + ... Point(0, 1), # Length is 0 + ... GeometryCollection([LineString([(0,0),(0,1)]), Point(1,1)]) # Length of LineString only + ... ] + ... ) + + Default behavior (use_spheroid=False): + + >>> result = bbq.st_length(series) + >>> result + 0 111195.101177 + 1 0.0 + 2 0.0 + 3 111195.101177 + dtype: Float64 + + Args: + series (bigframes.series.Series | bigframes.geopandas.GeoSeries): + A series containing geography objects. + use_spheroid (bool, optional): + Determines how this function measures distance. + If FALSE (default), measures distance on a perfect sphere. + Currently, only FALSE is supported. + + Returns: + bigframes.series.Series: + Series of floats representing the lengths in meters. + """ + series = series._apply_unary_op(ops.GeoStLengthOp(use_spheroid=use_spheroid)) + series.name = None + return series diff --git a/bigframes/core/compile/scalar_op_compiler.py b/bigframes/core/compile/scalar_op_compiler.py index 7707f16dad..15495c38cb 100644 --- a/bigframes/core/compile/scalar_op_compiler.py +++ b/bigframes/core/compile/scalar_op_compiler.py @@ -30,7 +30,6 @@ import bigframes.core.compile.default_ordering import bigframes.core.compile.ibis_types import bigframes.core.expression as ex -import bigframes.dtypes import bigframes.operations as ops _ZERO = typing.cast(ibis_types.NumericValue, ibis_types.literal(0)) @@ -1079,6 +1078,12 @@ def geo_x_op_impl(x: ibis_types.Value): return typing.cast(ibis_types.GeoSpatialValue, x).x() +@scalar_op_compiler.register_unary_op(ops.GeoStLengthOp, pass_op=True) +def geo_length_op_impl(x: ibis_types.Value, op: ops.GeoStLengthOp): + # Call the st_length UDF defined in this file (or imported) + return st_length(x, op.use_spheroid) + + @scalar_op_compiler.register_unary_op(ops.geo_y_op) def geo_y_op_impl(x: ibis_types.Value): return typing.cast(ibis_types.GeoSpatialValue, x).y() @@ -2057,6 +2062,12 @@ def st_distance(a: ibis_dtypes.geography, b: ibis_dtypes.geography, use_spheroid """Convert string to geography.""" +@ibis_udf.scalar.builtin +def st_length(geog: ibis_dtypes.geography, use_spheroid: bool) -> ibis_dtypes.float: # type: ignore + """ST_LENGTH BQ builtin. This body is never executed.""" + pass + + @ibis_udf.scalar.builtin def unix_micros(a: ibis_dtypes.timestamp) -> int: # type: ignore """Convert a timestamp to microseconds""" diff --git a/bigframes/geopandas/geoseries.py b/bigframes/geopandas/geoseries.py index 38ebda7d92..b302f42a13 100644 --- a/bigframes/geopandas/geoseries.py +++ b/bigframes/geopandas/geoseries.py @@ -30,6 +30,12 @@ def __init__(self, data=None, index=None, **kwargs): data=data, index=index, dtype=geopandas.array.GeometryDtype(), **kwargs ) + @property + def length(self): + raise NotImplementedError( + "GeoSeries.length is not yet implemented. Please use bigframes.bigquery.st_length(geoseries) instead." + ) + @property def x(self) -> bigframes.series.Series: series = self._apply_unary_op(ops.geo_x_op) diff --git a/bigframes/operations/__init__.py b/bigframes/operations/__init__.py index 3e97ec6f4a..83ca79caec 100644 --- a/bigframes/operations/__init__.py +++ b/bigframes/operations/__init__.py @@ -101,6 +101,7 @@ geo_x_op, geo_y_op, GeoStDistanceOp, + GeoStLengthOp, ) from bigframes.operations.json_ops import ( JSONExtract, @@ -385,6 +386,7 @@ "geo_st_geogfromtext_op", "geo_st_geogpoint_op", "geo_st_intersection_op", + "GeoStLengthOp", "geo_x_op", "geo_y_op", "GeoStDistanceOp", diff --git a/bigframes/operations/geo_ops.py b/bigframes/operations/geo_ops.py index 98da9099cd..b7dd704626 100644 --- a/bigframes/operations/geo_ops.py +++ b/bigframes/operations/geo_ops.py @@ -80,3 +80,12 @@ class GeoStDistanceOp(base_ops.BinaryOp): def output_type(self, *input_types: dtypes.ExpressionType) -> dtypes.ExpressionType: return dtypes.FLOAT_DTYPE + + +@dataclasses.dataclass(frozen=True) +class GeoStLengthOp(base_ops.UnaryOp): + name = "geo_st_length" + use_spheroid: bool = False + + def output_type(self, *input_types: dtypes.ExpressionType) -> dtypes.ExpressionType: + return dtypes.FLOAT_DTYPE diff --git a/tests/system/small/bigquery/test_geo.py b/tests/system/small/bigquery/test_geo.py index be517fb5cc..75217adc59 100644 --- a/tests/system/small/bigquery/test_geo.py +++ b/tests/system/small/bigquery/test_geo.py @@ -19,10 +19,14 @@ from shapely.geometry import ( # type: ignore GeometryCollection, LineString, + MultiLineString, + MultiPoint, + MultiPolygon, Point, Polygon, ) +from bigframes.bigquery import st_length import bigframes.bigquery as bbq import bigframes.geopandas @@ -59,6 +63,66 @@ def test_geo_st_area(): ) +# Expected length for 1 degree of longitude at the equator is approx 111195.079734 meters +DEG_LNG_EQUATOR_METERS = 111195.07973400292 + + +def test_st_length_various_geometries(session): + input_geometries = [ + Point(0, 0), + LineString([(0, 0), (1, 0)]), + Polygon([(0, 0), (1, 0), (0, 1), (0, 0)]), + MultiPoint([Point(0, 0), Point(1, 1)]), + MultiLineString([LineString([(0, 0), (1, 0)]), LineString([(0, 0), (0, 1)])]), + MultiPolygon( + [ + Polygon([(0, 0), (1, 0), (0, 1), (0, 0)]), + Polygon([(2, 2), (3, 2), (2, 3), (2, 2)]), + ] + ), + GeometryCollection([Point(0, 0), LineString([(0, 0), (1, 0)])]), + GeometryCollection([]), + None, # Represents NULL geography input + GeometryCollection([Point(1, 1), Point(2, 2)]), + ] + geoseries = bigframes.geopandas.GeoSeries(input_geometries, session=session) + + expected_lengths = pd.Series( + [ + 0.0, # Point + DEG_LNG_EQUATOR_METERS, # LineString + 0.0, # Polygon + 0.0, # MultiPoint + 2 * DEG_LNG_EQUATOR_METERS, # MultiLineString + 0.0, # MultiPolygon + DEG_LNG_EQUATOR_METERS, # GeometryCollection (Point + LineString) + 0.0, # Empty GeometryCollection + pd.NA, # None input for ST_LENGTH(NULL) is NULL + 0.0, # GeometryCollection (Point + Point) + ], + index=pd.Index(range(10), dtype="Int64"), + dtype="Float64", + ) + + # Test default use_spheroid + result_default = st_length(geoseries).to_pandas() + pd.testing.assert_series_equal( + result_default, + expected_lengths, + rtol=1e-3, + atol=1e-3, # For comparisons involving 0.0 + ) # type: ignore + + # Test explicit use_spheroid=False + result_explicit_false = st_length(geoseries, use_spheroid=False).to_pandas() + pd.testing.assert_series_equal( + result_explicit_false, + expected_lengths, + rtol=1e-3, + atol=1e-3, # For comparisons involving 0.0 + ) # type: ignore + + def test_geo_st_difference_with_geometry_objects(): data1 = [ Polygon([(0, 0), (10, 0), (10, 10), (0, 0)]), diff --git a/tests/system/small/geopandas/test_geoseries.py b/tests/system/small/geopandas/test_geoseries.py index ae99fd6fc2..36dd070ef5 100644 --- a/tests/system/small/geopandas/test_geoseries.py +++ b/tests/system/small/geopandas/test_geoseries.py @@ -96,6 +96,17 @@ def test_geo_area_not_supported(): bf_series.area +def test_geoseries_length_property_not_implemented(session): + gs = bigframes.geopandas.GeoSeries([Point(0, 0)], session=session) + with pytest.raises( + NotImplementedError, + match=re.escape( + "GeoSeries.length is not yet implemented. Please use bigframes.bigquery.st_length(geoseries) instead." + ), + ): + _ = gs.length + + def test_geo_distance_not_supported(): s1 = bigframes.pandas.Series( [