Skip to content

Commit

Permalink
special case of geom column comparisons
Browse files Browse the repository at this point in the history
  • Loading branch information
fvankrieken committed Nov 13, 2024
1 parent 53136c5 commit 70dd7fc
Show file tree
Hide file tree
Showing 2 changed files with 60 additions and 6 deletions.
39 changes: 35 additions & 4 deletions dcpy/data/compare.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,7 @@ def compare_sql_keyed_rows(

comps: dict[str, pd.DataFrame] = {}

def query(column):
def query(column: str) -> str:

Check warning on line 134 in dcpy/data/compare.py

View check run for this annotation

Codecov / codecov/patch

dcpy/data/compare.py#L134

Added line #L134 was not covered by tests
lc = f'"left"."{column}"'
rc = f'"right"."{column}"'
return f"""
Expand All @@ -143,10 +143,41 @@ def query(column):
WHERE {lc} IS DISTINCT FROM {rc}
"""

def spatial_query(column: str) -> str:
lc = f'"left"."{column}"'
rc = f'"right"."{column}"'
return f"""

Check warning on line 149 in dcpy/data/compare.py

View check run for this annotation

Codecov / codecov/patch

dcpy/data/compare.py#L146-L149

Added lines #L146 - L149 were not covered by tests
SELECT
{left_keys},
st_orderingequals({lc}, {rc}) AS "ordering_equal",
st_equals({lc}, {rc}) AS "spatially_equal"
FROM {left} AS "left"
INNER JOIN {right} AS "right"
ON {on}
WHERE {lc} IS DISTINCT FROM {rc}
"""

left_geom_columns = client.get_geometry_columns(left)
right_geom_columns = client.get_geometry_columns(right)

Check warning on line 161 in dcpy/data/compare.py

View check run for this annotation

Codecov / codecov/patch

dcpy/data/compare.py#L160-L161

Added lines #L160 - L161 were not covered by tests

for column in non_key_columns:
comp_df = client.execute_select_query(query(column))
comp_df = comp_df.set_index(key_columns)
comp_df.columns = pd.Index(["left", "right"])
# simple inequality is not informative for spatial columns
if (column in left_geom_columns) and (column in right_geom_columns):
comp_df = client.execute_select_query(spatial_query(column))
comp_df = comp_df.set_index(key_columns)
comp_df.columns = pd.Index(["ordering_equal", "spatially_equal"])

Check warning on line 168 in dcpy/data/compare.py

View check run for this annotation

Codecov / codecov/patch

dcpy/data/compare.py#L166-L168

Added lines #L166 - L168 were not covered by tests

elif (column not in left_geom_columns) and (column not in right_geom_columns):
comp_df = client.execute_select_query(query(column))
comp_df = comp_df.set_index(key_columns)
comp_df.columns = pd.Index(["left", "right"])

Check warning on line 173 in dcpy/data/compare.py

View check run for this annotation

Codecov / codecov/patch

dcpy/data/compare.py#L171-L173

Added lines #L171 - L173 were not covered by tests

# No point comparing geom and non-geom.
# This should be caught in `column_comparison` of report
# Other non-equivalent types are allowed - text vs varchar can produce valid comps
else:
continue

Check warning on line 179 in dcpy/data/compare.py

View check run for this annotation

Codecov / codecov/patch

dcpy/data/compare.py#L179

Added line #L179 was not covered by tests

if len(comp_df) > 0:
comps[column] = comp_df.copy()

Expand Down
27 changes: 25 additions & 2 deletions dcpy/utils/postgres.py
Original file line number Diff line number Diff line change
Expand Up @@ -222,15 +222,38 @@ def get_table_columns(self, table_name: str) -> list[str]:
def get_column_types(self, table_name: str) -> dict[str, str]:
columns = self.execute_select_query(
"""
SELECT column_name, data_type from information_schema.columns
SELECT
column_name,
CASE
WHEN data_type = 'USER-DEFINED' THEN udt_name
ELSE data_type
END AS data_type
FROM information_schema.columns
WHERE table_schema = ':table_schema'
AND table_name = ':table_name';
AND table_name = ':table_name';
""",
table_schema=AsIs(self.schema),
table_name=AsIs(table_name),
)
return {r["column_name"]: r["data_type"] for _, r in columns.iterrows()}

def get_geometry_columns(self, table_name: str) -> set[str]:
columns = self.execute_select_query(

Check warning on line 241 in dcpy/utils/postgres.py

View check run for this annotation

Codecov / codecov/patch

dcpy/utils/postgres.py#L241

Added line #L241 was not covered by tests
"""
SELECT
column_name
FROM information_schema.columns
WHERE
table_schema = ':table_schema'
AND table_name = ':table_name'
AND data_type = 'USER-DEFINED'
AND udt_name = 'geometry';
""",
table_schema=AsIs(self.schema),
table_name=AsIs(table_name),
)
return columns["column_name"]

Check warning on line 255 in dcpy/utils/postgres.py

View check run for this annotation

Codecov / codecov/patch

dcpy/utils/postgres.py#L255

Added line #L255 was not covered by tests

def add_pk(self, table: str, id_column: str = "id"):
self.execute_query(
'ALTER TABLE ":schema".":table" ADD COLUMN ":id_column" SERIAL CONSTRAINT ":constraint" PRIMARY KEY;',
Expand Down

0 comments on commit 70dd7fc

Please sign in to comment.