Skip to content

Commit 70dd7fc

Browse files
committed
special case of geom column comparisons
1 parent 53136c5 commit 70dd7fc

File tree

2 files changed

+60
-6
lines changed

2 files changed

+60
-6
lines changed

dcpy/data/compare.py

Lines changed: 35 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -131,7 +131,7 @@ def compare_sql_keyed_rows(
131131

132132
comps: dict[str, pd.DataFrame] = {}
133133

134-
def query(column):
134+
def query(column: str) -> str:
135135
lc = f'"left"."{column}"'
136136
rc = f'"right"."{column}"'
137137
return f"""
@@ -143,10 +143,41 @@ def query(column):
143143
WHERE {lc} IS DISTINCT FROM {rc}
144144
"""
145145

146+
def spatial_query(column: str) -> str:
147+
lc = f'"left"."{column}"'
148+
rc = f'"right"."{column}"'
149+
return f"""
150+
SELECT
151+
{left_keys},
152+
st_orderingequals({lc}, {rc}) AS "ordering_equal",
153+
st_equals({lc}, {rc}) AS "spatially_equal"
154+
FROM {left} AS "left"
155+
INNER JOIN {right} AS "right"
156+
ON {on}
157+
WHERE {lc} IS DISTINCT FROM {rc}
158+
"""
159+
160+
left_geom_columns = client.get_geometry_columns(left)
161+
right_geom_columns = client.get_geometry_columns(right)
162+
146163
for column in non_key_columns:
147-
comp_df = client.execute_select_query(query(column))
148-
comp_df = comp_df.set_index(key_columns)
149-
comp_df.columns = pd.Index(["left", "right"])
164+
# simple inequality is not informative for spatial columns
165+
if (column in left_geom_columns) and (column in right_geom_columns):
166+
comp_df = client.execute_select_query(spatial_query(column))
167+
comp_df = comp_df.set_index(key_columns)
168+
comp_df.columns = pd.Index(["ordering_equal", "spatially_equal"])
169+
170+
elif (column not in left_geom_columns) and (column not in right_geom_columns):
171+
comp_df = client.execute_select_query(query(column))
172+
comp_df = comp_df.set_index(key_columns)
173+
comp_df.columns = pd.Index(["left", "right"])
174+
175+
# No point comparing geom and non-geom.
176+
# This should be caught in `column_comparison` of report
177+
# Other non-equivalent types are allowed - text vs varchar can produce valid comps
178+
else:
179+
continue
180+
150181
if len(comp_df) > 0:
151182
comps[column] = comp_df.copy()
152183

dcpy/utils/postgres.py

Lines changed: 25 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -222,15 +222,38 @@ def get_table_columns(self, table_name: str) -> list[str]:
222222
def get_column_types(self, table_name: str) -> dict[str, str]:
223223
columns = self.execute_select_query(
224224
"""
225-
SELECT column_name, data_type from information_schema.columns
225+
SELECT
226+
column_name,
227+
CASE
228+
WHEN data_type = 'USER-DEFINED' THEN udt_name
229+
ELSE data_type
230+
END AS data_type
231+
FROM information_schema.columns
226232
WHERE table_schema = ':table_schema'
227-
AND table_name = ':table_name';
233+
AND table_name = ':table_name';
228234
""",
229235
table_schema=AsIs(self.schema),
230236
table_name=AsIs(table_name),
231237
)
232238
return {r["column_name"]: r["data_type"] for _, r in columns.iterrows()}
233239

240+
def get_geometry_columns(self, table_name: str) -> set[str]:
241+
columns = self.execute_select_query(
242+
"""
243+
SELECT
244+
column_name
245+
FROM information_schema.columns
246+
WHERE
247+
table_schema = ':table_schema'
248+
AND table_name = ':table_name'
249+
AND data_type = 'USER-DEFINED'
250+
AND udt_name = 'geometry';
251+
""",
252+
table_schema=AsIs(self.schema),
253+
table_name=AsIs(table_name),
254+
)
255+
return columns["column_name"]
256+
234257
def add_pk(self, table: str, id_column: str = "id"):
235258
self.execute_query(
236259
'ALTER TABLE ":schema".":table" ADD COLUMN ":id_column" SERIAL CONSTRAINT ":constraint" PRIMARY KEY;',

0 commit comments

Comments
 (0)