Skip to content

Commit 43892f8

Browse files
authored
Update validator with the latest spec changes (#70)
1 parent 197f3a2 commit 43892f8

File tree

6 files changed

+69
-44
lines changed

6 files changed

+69
-44
lines changed

.github/workflows/scripts.yml

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,28 @@ on:
77
pull_request:
88

99
jobs:
10+
validate-examples:
11+
runs-on: ubuntu-latest
12+
steps:
13+
- uses: actions/checkout@v2
14+
15+
- name: Set up Python 3.8
16+
uses: actions/setup-python@v2
17+
with:
18+
python-version: 3.8
19+
20+
- name: Install validator
21+
run: |
22+
cd validator/python
23+
python -m pip install --no-binary geoparquet_validator .
24+
25+
- name: Run validator
26+
run: |
27+
for example in $(ls examples/*.parquet); do
28+
echo $example;
29+
geoparquet_validator $example || exit 1;
30+
done
31+
1032
test-json-metadata:
1133
runs-on: ubuntu-latest
1234
steps:

examples/example.parquet

110 Bytes
Binary file not shown.

examples/example.py

Lines changed: 3 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -1,32 +1,5 @@
11
"""
22
Generates `example.parquet` using pyarrow by running `python example.py`.
3-
4-
You can print the metadata with:
5-
6-
.. code-block:: python
7-
8-
>>> import json, pprint, pyarrow.parquet as pq
9-
>>> pprint.pprint(json.loads(pq.read_schema("example.parquet").metadata[b"geo"]))
10-
{'columns': {'geometry': {'bbox': [-180.0, -90.0, 180.0, 83.6451],
11-
'crs': 'GEOGCRS["WGS 84 (CRS84)",ENSEMBLE["World '
12-
'Geodetic System 1984 ensemble",MEMBER["World '
13-
'Geodetic System 1984 '
14-
'(Transit)"],MEMBER["World Geodetic System '
15-
'1984 (G730)"],MEMBER["World Geodetic System '
16-
'1984 (G873)"],MEMBER["World Geodetic System '
17-
'1984 (G1150)"],MEMBER["World Geodetic System '
18-
'1984 (G1674)"],MEMBER["World Geodetic System '
19-
'1984 (G1762)"],MEMBER["World Geodetic System '
20-
'1984 (G2139)"],ELLIPSOID["WGS '
21-
'84",6378137,298.257223563],ENSEMBLEACCURACY[2.0]],CS[ellipsoidal,2],AXIS["geodetic '
22-
'longitude (Lon)",east],AXIS["geodetic '
23-
'latitude '
24-
'(Lat)",north],UNIT["degree",0.0174532925199433],USAGE[SCOPE["Not '
25-
'known."],AREA["World."],BBOX[-90,-180,90,180]],ID["OGC","CRS84"]]',
26-
'edges': 'planar',
27-
'encoding': 'WKB'}},
28-
'primary_column': 'geometry',
29-
'version': '0.1.0'}
303
"""
314
import json
325
import pathlib
@@ -39,7 +12,7 @@
3912
HERE = pathlib.Path(__file__).parent
4013

4114
df = geopandas.read_file(geopandas.datasets.get_path("naturalearth_lowres"))
42-
df = df.to_crs('crs84')
15+
df = df.to_crs("crs84")
4316
table = pa.Table.from_pandas(df.head().to_wkb())
4417

4518

@@ -48,8 +21,9 @@
4821
"primary_column": "geometry",
4922
"columns": {
5023
"geometry": {
51-
"crs": df.crs.to_wkt(pyproj.enums.WktVersion.WKT2_2019_SIMPLIFIED),
5224
"encoding": "WKB",
25+
"geometry_type": ["Polygon", "MultiPolygon"],
26+
"crs": df.crs.to_wkt(pyproj.enums.WktVersion.WKT2_2019_SIMPLIFIED),
5327
"edges": "planar",
5428
"bbox": [round(x, 4) for x in df.geometry.unary_union.bounds],
5529
},

examples/example_metadata.json

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,9 +8,13 @@
88
180.0,
99
83.6451
1010
],
11-
"crs": "GEOGCRS[\"WGS 84\",ENSEMBLE[\"World Geodetic System 1984 ensemble\",MEMBER[\"World Geodetic System 1984 (Transit)\"],MEMBER[\"World Geodetic System 1984 (G730)\"],MEMBER[\"World Geodetic System 1984 (G873)\"],MEMBER[\"World Geodetic System 1984 (G1150)\"],MEMBER[\"World Geodetic System 1984 (G1674)\"],MEMBER[\"World Geodetic System 1984 (G1762)\"],MEMBER[\"World Geodetic System 1984 (G2139)\"],ELLIPSOID[\"WGS 84\",6378137,298.257223563],ENSEMBLEACCURACY[2.0]],CS[ellipsoidal,2],AXIS[\"geodetic latitude (Lat)\",north],AXIS[\"geodetic longitude (Lon)\",east],UNIT[\"degree\",0.0174532925199433],USAGE[SCOPE[\"Horizontal component of 3D system.\"],AREA[\"World.\"],BBOX[-90,-180,90,180]],ID[\"EPSG\",4326]]",
11+
"crs": "GEOGCRS[\"WGS 84 (CRS84)\",ENSEMBLE[\"World Geodetic System 1984 ensemble\",MEMBER[\"World Geodetic System 1984 (Transit)\"],MEMBER[\"World Geodetic System 1984 (G730)\"],MEMBER[\"World Geodetic System 1984 (G873)\"],MEMBER[\"World Geodetic System 1984 (G1150)\"],MEMBER[\"World Geodetic System 1984 (G1674)\"],MEMBER[\"World Geodetic System 1984 (G1762)\"],MEMBER[\"World Geodetic System 1984 (G2139)\"],ELLIPSOID[\"WGS 84\",6378137,298.257223563],ENSEMBLEACCURACY[2.0]],CS[ellipsoidal,2],AXIS[\"geodetic longitude (Lon)\",east],AXIS[\"geodetic latitude (Lat)\",north],UNIT[\"degree\",0.0174532925199433],USAGE[SCOPE[\"Not known.\"],AREA[\"World.\"],BBOX[-90,-180,90,180]],ID[\"OGC\",\"CRS84\"]]",
1212
"edges": "planar",
13-
"encoding": "WKB"
13+
"encoding": "WKB",
14+
"geometry_type": [
15+
"Polygon",
16+
"MultiPolygon"
17+
]
1418
}
1519
},
1620
"primary_column": "geometry",

format-specs/geoparquet.md

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ Each geometry column in the dataset must be included in the columns field above
5353
| Field Name | Type | Description |
5454
| ---------- | ----------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------- |
5555
| encoding | string | **REQUIRED** Name of the geometry encoding format. Currently only 'WKB' is supported. |
56-
| geometry_type | string or \[string] | **REQUIRED** The geometry type(s) of all geometries, or "Unknown" if they are not known. |
56+
| geometry_type | string or \[string] | **REQUIRED** The geometry type(s) of all geometries, or 'Unknown' if they are not known. |
5757
| crs | string | **OPTIONAL** [WKT2](https://docs.opengeospatial.org/is/18-010r7/18-010r7.html) string representing the Coordinate Reference System (CRS) of the geometry. If the crs field is not included then the data in this column must be stored in longitude, latitude. In the case where a crs is not provided, CRS-aware implementations should assume a default value of [OGC:CRS84](https://www.opengis.net/def/crs/OGC/1.3/CRS84) (longitude-latitude coordinates) |
5858
| edges | string | **OPTIONAL** Name of the coordinate system for the edges. Must be one of 'planar' or 'spherical'. The default value is 'planar'. |
5959
| bbox | \[number] | **OPTIONAL** Bounding Box of the geometries in the file, formatted according to [RFC 7946, section 5](https://tools.ietf.org/html/rfc7946#section-5) |
@@ -62,7 +62,7 @@ Each geometry column in the dataset must be included in the columns field above
6262

6363
#### crs
6464

65-
The Coordinate Reference System (CRS) is an optional parameter for each geometry column defined in geoparquet format.
65+
The Coordinate Reference System (CRS) is an optional parameter for each geometry column defined in geoparquet format.
6666

6767
The CRS must be provided in [WKT](https://en.wikipedia.org/wiki/Well-known_text_representation_of_coordinate_reference_systems) version 2, also known as **WKT2**. WKT2 has several revisions, this specification only supports [WKT2_2019](https://docs.opengeospatial.org/is/18-010r7/18-010r7.html).
6868

@@ -98,7 +98,7 @@ GEOGCRS["WGS 84 (CRS84)",
9898
ID["OGC","CRS84"]]
9999
```
100100

101-
Due to the large number of CRSes available and the difficulty of implementing all of them, we expect that a number of implementations will start without support for the optional `crs` field.
101+
Due to the large number of CRSes available and the difficulty of implementing all of them, we expect that a number of implementations will start without support for the optional `crs` field.
102102
Users are recommended to store their data in longitude, latitude (OGC:CRS84 or not including the `crs` field) for it to work with the widest number of tools. But data that is better served in particular projections can choose to use an alternate coordinate reference system. We expect many tools will support alternate CRSes, but encourage users to check to ensure their chosen tool supports their chosen crs.
103103

104104
#### epoch
@@ -122,9 +122,9 @@ Note that the current version of the spec only allows for a subset of WKB: 2D or
122122

123123
#### Coordinate axis order
124124

125-
The axis order of the coordinates in WKB stored in a geoparquet follows the de facto standard for axis order in WKB and is therefore always
126-
(x, y) where x is easting or longitude and y is northing or latitude. This ordering explicitly overrides the axis order as specified in the CRS.
127-
This follows the precedent of [GeoPackage](https://geopackage.org), see the [note in their spec](https://www.geopackage.org/spec130/#gpb_spec).
125+
The axis order of the coordinates in WKB stored in a geoparquet follows the de facto standard for axis order in WKB and is therefore always
126+
(x, y) where x is easting or longitude and y is northing or latitude. This ordering explicitly overrides the axis order as specified in the CRS.
127+
This follows the precedent of [GeoPackage](https://geopackage.org), see the [note in their spec](https://www.geopackage.org/spec130/#gpb_spec).
128128

129129
#### geometry_type
130130

validator/python/geoparquet_validator/schema.json

Lines changed: 32 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
"type": "object",
55
"description": "Parquet metadata included in the geo field.",
66
"properties": {
7-
"schema_version": {
7+
"version": {
88
"type": "string",
99
"pattern": "^0\\.1\\.[0-9]+$",
1010
"description": "The version of the geoparquet metadata standard used when writing."
@@ -20,15 +20,30 @@
2020
".*": {
2121
"type": "object",
2222
"properties": {
23-
"crs": {
24-
"type": "string",
25-
"description": "WKT2 representing the Coordinate Reference System (CRS) of the geometry."
26-
},
2723
"encoding": {
2824
"type": "string",
2925
"enum": ["WKB"],
3026
"description": "Name of the geometry encoding format. Currently only 'WKB' is supported."
3127
},
28+
"geometry_type": {
29+
"oneOf": [
30+
{
31+
"$ref": "#/$defs/geometry_type"
32+
},
33+
{
34+
"type": "array",
35+
"items": {
36+
"$ref": "#/$defs/geometry_type"
37+
},
38+
"uniqueItems": true
39+
}
40+
],
41+
"description": "The geometry type(s) of all geometries, or 'Unknown' if they are not known."
42+
},
43+
"crs": {
44+
"type": "string",
45+
"description": "WKT2 representing the Coordinate Reference System (CRS) of the geometry."
46+
},
3247
"edges": {
3348
"type": "string",
3449
"enum": ["planar", "spherical"],
@@ -55,14 +70,24 @@
5570
"description": "The maximum constant latitude line that bounds the rectangle (ymax)."
5671
}
5772
]
73+
},
74+
"epoch": {
75+
"type": "number",
76+
"description": "Coordinate epoch in case of a dynamic CRS, expressed as a decimal year."
5877
}
5978
},
6079
"additionalProperties": true,
61-
"required": ["crs", "encoding"]
80+
"required": ["encoding", "geometry_type"]
6281
}
6382
}
6483
}
6584
},
6685
"additionalProperties": true,
67-
"required": ["schema_version", "primary_column", "columns"]
86+
"required": ["version", "primary_column", "columns"],
87+
"$defs": {
88+
"geometry_type": {
89+
"type": "string",
90+
"enum": ["Point", "LineString", "Polygon", "MultiPoint", "MultiLineString", "MultiPolygon", "GeometryCollection", "Unknown"]
91+
}
92+
}
6893
}

0 commit comments

Comments
 (0)