Skip to content

Commit

Permalink
Remove attribute-style accesses in util
Browse files Browse the repository at this point in the history
  • Loading branch information
luseverin committed Aug 18, 2024
1 parent 9c6c769 commit 3e990df
Show file tree
Hide file tree
Showing 9 changed files with 68 additions and 19 deletions.
49 changes: 49 additions & 0 deletions check_attribute-style_access.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
import ast
import os
import sys

class DataFrameAttributeVisitor(ast.NodeVisitor):
def __init__(self):
self.dataframe_vars = set()
self.accesses = []

def visit_Assign(self, node):
# Check if the assigned value is an instance of DataFrame or Series
if isinstance(node.value, ast.Call) and isinstance(node.value.func, ast.Attribute):
if (node.value.func.attr == 'DataFrame'
or node.value.func.attr == 'Series'
or node.value.func.attr == "GeoDataFrame"
or node.value.func.attr == "Dataset"
or node.value.func.attr == "DataArray"):
for target in node.targets:
if isinstance(target, ast.Name):
self.dataframe_vars.add(target.id)
self.generic_visit(node)

def visit_Attribute(self, node):
# Check if the attribute access is on a variable that is a DataFrame or Series
if isinstance(node.value, ast.Name) and node.value.id in self.dataframe_vars:
self.accesses.append((node.lineno, node.col_offset, node.value.id, node.attr))
self.generic_visit(node)

def find_dataframe_attribute_accesses(directory):
visitor = DataFrameAttributeVisitor()
for root, _, files in os.walk(directory):
for file in files:
if file.endswith('.py'):
file_path = os.path.join(root, file)
with open(file_path, 'r', encoding='utf-8') as f:
try:
tree = ast.parse(f.read(), filename=file_path)
visitor.visit(tree)
if visitor.accesses:
print(f'In file {file_path}:')
for lineno, col_offset, var_name, attr in visitor.accesses:
print(f' Line {lineno}, Column {col_offset}: {var_name}.{attr}')
visitor.accesses = [] # Reset for the next file
except SyntaxError as e:
print(f'Syntax error in file {file_path}: {e}')

# Usage
path_to_check = sys.argv[1]
find_dataframe_attribute_accesses(path_to_check)
Binary file added climada/test/data/LitPop_150arcsec_LUX.hdf5
Binary file not shown.
Binary file added climada/test/data/LitPop_pop_150arcsec_AUT.hdf5
Binary file not shown.
Binary file not shown.
Binary file added climada/test/data/test_haz.h5
Binary file not shown.
4 changes: 2 additions & 2 deletions climada/util/api_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -739,7 +739,7 @@ def purge_cache_db(local_path):
def _multi_version(datasets):
ddf = pd.DataFrame(datasets)
gdf = ddf.groupby("name").agg({"version": "nunique"})
return list(gdf[gdf.version > 1].index)
return list(gdf[gdf["version"] > 1].index)

def get_hazard(
self,
Expand Down Expand Up @@ -1101,7 +1101,7 @@ def into_datasets_df(dataset_infos):
"""
dsdf = pd.DataFrame(dataset_infos)
ppdf = pd.DataFrame([ds.properties for ds in dataset_infos])
dtdf = pd.DataFrame([pd.Series(dt) for dt in dsdf.data_type])
dtdf = pd.DataFrame([pd.Series(dt) for dt in dsdf["data_type"]])

Check warning on line 1104 in climada/util/api_client.py

View check run for this annotation

Jenkins - WCR / Code Coverage

Not covered lines

Lines 742-1104 are not covered by tests

return (
dtdf.loc[
Expand Down
26 changes: 13 additions & 13 deletions climada/util/coordinates.py
Original file line number Diff line number Diff line change
Expand Up @@ -1582,16 +1582,16 @@ def get_admin1_geometries(countries):
for country in admin1_info:
# fill admin 1 region names and codes to GDF for single country:
gdf_tmp = gpd.GeoDataFrame(columns=gdf.columns)
gdf_tmp.admin1_name = [record['name'] for record in admin1_info[country]]
gdf_tmp.iso_3166_2 = [record['iso_3166_2'] for record in admin1_info[country]]
gdf_tmp['admin1_name'] = [record['name'] for record in admin1_info[country]]
gdf_tmp['iso_3166_2'] = [record['iso_3166_2'] for record in admin1_info[country]]
# With this initiation of GeoSeries in a list comprehension,
# the ability of geopandas to convert shapereader.Shape to (Multi)Polygon is exploited:
geoseries = gpd.GeoSeries([gpd.GeoSeries(shape).values[0]
for shape in admin1_shapes[country]])
gdf_tmp.geometry = list(geoseries)
# fill columns with country identifiers (admin 0):
gdf_tmp.iso_3n = pycountry.countries.lookup(country).numeric
gdf_tmp.iso_3a = country
gdf_tmp['iso_3n'] = pycountry.countries.lookup(country).numeric
gdf_tmp['iso_3a'] = country
gdf = pd.concat([gdf, gdf_tmp], ignore_index=True)
return gdf

Expand Down Expand Up @@ -2401,13 +2401,13 @@ def points_to_raster(points_df, val_names=None, res=0.0, raster_res=0.0, crs=DEF
if not val_names:
val_names = ['value']
if not res:
res = np.abs(get_resolution(points_df.latitude.values,
points_df.longitude.values)).min()
res = np.abs(get_resolution(points_df['latitude'].values,
points_df['longitude'].values)).min()
if not raster_res:
raster_res = res

def apply_box(df_exp):
fun = lambda r: Point(r.longitude, r.latitude).buffer(res / 2).envelope
fun = lambda r: Point(r['longitude'], r['latitude']).buffer(res / 2).envelope
return df_exp.apply(fun, axis=1)

LOGGER.info('Raster from resolution %s to %s.', res, raster_res)
Expand All @@ -2431,16 +2431,16 @@ def apply_box(df_exp):

# renormalize longitude if necessary
if equal_crs(df_poly.crs, DEF_CRS):
xmin, ymin, xmax, ymax = latlon_bounds(points_df.latitude.values,
points_df.longitude.values)
xmin, ymin, xmax, ymax = latlon_bounds(points_df['latitude'].values,
points_df['longitude'].values)
x_mid = 0.5 * (xmin + xmax)
# we don't really change the CRS when rewrapping, so we reset the CRS attribute afterwards
df_poly = df_poly \
.to_crs({"proj": "longlat", "lon_wrap": x_mid}) \
.set_crs(DEF_CRS, allow_override=True)
else:
xmin, ymin, xmax, ymax = (points_df.longitude.min(), points_df.latitude.min(),
points_df.longitude.max(), points_df.latitude.max())
xmin, ymin, xmax, ymax = (points_df['longitude'].min(), points_df['latitude'].min(),
points_df['longitude'].max(), points_df['latitude'].max())

# construct raster
rows, cols, ras_trans = pts_to_raster_meta((xmin, ymin, xmax, ymax),
Expand Down Expand Up @@ -2666,9 +2666,9 @@ def set_df_geometry_points(df_val, scheduler=None, crs=None):
" effect and will be removed in a future version.", DeprecationWarning)

# keep the original crs if any
crs = df_val.crs if crs is None else crs # crs might now still be None
crs = df_val['crs'] if crs is None else crs # crs might now still be None

df_val.set_geometry(gpd.points_from_xy(df_val.longitude, df_val.latitude),
df_val.set_geometry(gpd.points_from_xy(df_val['longitude'], df_val['latitude']),
inplace=True, crs=crs)


Expand Down
4 changes: 2 additions & 2 deletions climada/util/finance.py
Original file line number Diff line number Diff line change
Expand Up @@ -299,14 +299,14 @@ def wealth2gdp(cntry_iso, non_financial=True, ref_year=2016,
if non_financial:
try:
val = factors_all_countries[
factors_all_countries.country_iso3 == cntry_iso]['NFW-to-GDP-ratio'].values[0]
factors_all_countries['country_iso3'] == cntry_iso]['NFW-to-GDP-ratio'].values[0]
except (AttributeError, KeyError, IndexError):
LOGGER.warning('No data for country, using mean factor.')
val = factors_all_countries["NFW-to-GDP-ratio"].mean()
else:
try:
val = factors_all_countries[
factors_all_countries.country_iso3 == cntry_iso]['TW-to-GDP-ratio'].values[0]
factors_all_countries['country_iso3'] == cntry_iso]['TW-to-GDP-ratio'].values[0]
except (AttributeError, KeyError, IndexError):
LOGGER.warning('No data for country, using mean factor.')
val = factors_all_countries["TW-to-GDP-ratio"].mean()
Expand Down
4 changes: 2 additions & 2 deletions climada/util/lines_polys_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -445,7 +445,7 @@ def exp_geom_to_grid(exp, grid, disagg_met, disagg_val):

if disagg_val is not None:
exp = exp.copy()
exp.gdf.value = disagg_val
exp.gdf['value'] = disagg_val

Check warning on line 448 in climada/util/lines_polys_handler.py

View check run for this annotation

Jenkins - WCR / Code Coverage

Not covered line

Line 448 is not covered by tests

if ((disagg_val is None) and ('value' not in exp.gdf.columns)):
raise ValueError('There is no value column in the exposure gdf to'+
Expand Down Expand Up @@ -615,7 +615,7 @@ def _disagg_values_div(gdf_pnts):
gdf_disagg = gdf_pnts.copy(deep=False)

group = gdf_pnts.groupby(axis=0, level=0)
vals = group.value.mean() / group.value.count()
vals = group['value'].mean() / group['value'].count()

vals = vals.reindex(gdf_pnts.index, level=0)
gdf_disagg['value'] = vals
Expand Down

0 comments on commit 3e990df

Please sign in to comment.