|
4 | 4 | from pathlib import Path |
5 | 5 | from pydantic import TypeAdapter, BaseModel |
6 | 6 | import pytest |
| 7 | +from shapely import Polygon, MultiPolygon |
7 | 8 | import yaml |
8 | 9 | from unittest import TestCase, mock |
9 | 10 |
|
@@ -91,16 +92,24 @@ def test_to_parquet(file: dict, create_temp_filesystem: Path): |
91 | 92 |
|
92 | 93 | def test_validate_processing_steps(): |
93 | 94 | steps = [ |
94 | | - PreprocessingStep(name="no_arg_function"), |
| 95 | + PreprocessingStep(name="multi"), |
95 | 96 | PreprocessingStep(name="drop_columns", args={"columns": ["col1", "col2"]}), |
96 | 97 | ] |
97 | 98 | compiled_steps = transform.validate_processing_steps("test", steps) |
98 | 99 | assert len(compiled_steps) == 2 |
99 | 100 |
|
100 | | - df = pd.DataFrame({"col1": [1, 2, 3], "col2": [4, 5, 6], "col3": [7, 8, 9]}) |
| 101 | + df = gpd.GeoDataFrame( |
| 102 | + { |
| 103 | + "col1": [1, 2, 3], |
| 104 | + "col2": [4, 5, 6], |
| 105 | + "col3": gpd.GeoSeries([None, None, None]), |
| 106 | + } |
| 107 | + ).set_geometry("col3") |
101 | 108 | for step in compiled_steps: |
102 | 109 | df = step(df) |
103 | | - expected = pd.DataFrame({"col3": [7, 8, 9]}) |
| 110 | + expected = gpd.GeoDataFrame( |
| 111 | + {"col3": gpd.GeoSeries([None, None, None])} |
| 112 | + ).set_geometry("col3") |
104 | 113 | assert df.equals(expected) |
105 | 114 |
|
106 | 115 |
|
@@ -155,7 +164,7 @@ def test_invalid_function(self): |
155 | 164 |
|
156 | 165 | class TestPreprocessors(TestCase): |
157 | 166 | proc = transform.Preprocessor(TEST_DATASET_NAME) |
158 | | - gdf: gpd.GeoDataFrame = gpd.read_parquet(RESOURCES / TEST_DATA_DIR / "test.parquet") |
| 167 | + gdf = gpd.read_parquet(RESOURCES / TEST_DATA_DIR / "test.parquet") |
159 | 168 | basic_df = pd.DataFrame({"a": [2, 3, 1], "b": ["b_1", "b_2", "c_3"]}) |
160 | 169 | messy_names_df = pd.DataFrame({"Column": [1, 2], "Two_Words": [3, 4]}) |
161 | 170 | dupe_df = pd.DataFrame({"a": [1, 1, 1, 2], "b": [3, 1, 3, 2]}) |
@@ -287,6 +296,44 @@ def test_rename_geodataframe(self): |
287 | 296 | expected = gpd.read_parquet(RESOURCES / TEST_DATA_DIR / "renamed.parquet") |
288 | 297 | assert transformed.equals(expected) |
289 | 298 |
|
| 299 | + def test_multi(self): |
| 300 | + gdf = gpd.GeoDataFrame( |
| 301 | + { |
| 302 | + "a": [1, 2, 3], |
| 303 | + "wkt": gpd.GeoSeries( |
| 304 | + [ |
| 305 | + None, |
| 306 | + Polygon([(0, 0), (0, 1), (1, 0), (0, 0)]), |
| 307 | + MultiPolygon( |
| 308 | + [ |
| 309 | + Polygon([(0, 0), (0, 1), (1, 0), (0, 0)]), |
| 310 | + Polygon([(0, 0), (0, -1), (-1, 0), (0, 0)]), |
| 311 | + ] |
| 312 | + ), |
| 313 | + ] |
| 314 | + ), |
| 315 | + } |
| 316 | + ).set_geometry("wkt") |
| 317 | + transformed = self.proc.multi(gdf) |
| 318 | + expected = gpd.GeoDataFrame( |
| 319 | + { |
| 320 | + "a": [1, 2, 3], |
| 321 | + "wkt": gpd.GeoSeries( |
| 322 | + [ |
| 323 | + None, |
| 324 | + MultiPolygon([Polygon([(0, 0), (0, 1), (1, 0), (0, 0)])]), |
| 325 | + MultiPolygon( |
| 326 | + [ |
| 327 | + Polygon([(0, 0), (0, 1), (1, 0), (0, 0)]), |
| 328 | + Polygon([(0, 0), (0, -1), (-1, 0), (0, 0)]), |
| 329 | + ] |
| 330 | + ), |
| 331 | + ] |
| 332 | + ), |
| 333 | + } |
| 334 | + ) |
| 335 | + assert transformed.equals(expected) |
| 336 | + |
290 | 337 |
|
291 | 338 | def test_preprocess_no_steps(create_temp_filesystem: Path): |
292 | 339 | input = create_temp_filesystem / "input.txt" |
|
0 commit comments