Remove attribute-style accesses in util

CLIMADA-project · Aug 18, 2024 · 3e990df · 3e990df
1 parent 9c6c769
commit 3e990df
Show file tree

Hide file tree

Showing 9 changed files with 68 additions and 19 deletions.
diff --git a/check_attribute-style_access.py b/check_attribute-style_access.py
@@ -0,0 +1,49 @@
+import ast
+import os
+import sys
+
+class DataFrameAttributeVisitor(ast.NodeVisitor):
+    def __init__(self):
+        self.dataframe_vars = set()
+        self.accesses = []
+
+    def visit_Assign(self, node):
+        # Check if the assigned value is an instance of DataFrame or Series
+        if isinstance(node.value, ast.Call) and isinstance(node.value.func, ast.Attribute):
+            if (node.value.func.attr == 'DataFrame'
+                 or node.value.func.attr == 'Series'
+                 or node.value.func.attr == "GeoDataFrame"
+                 or node.value.func.attr == "Dataset"
+                 or node.value.func.attr == "DataArray"):
+                for target in node.targets:
+                    if isinstance(target, ast.Name):
+                        self.dataframe_vars.add(target.id)
+        self.generic_visit(node)
+
+    def visit_Attribute(self, node):
+        # Check if the attribute access is on a variable that is a DataFrame or Series
+        if isinstance(node.value, ast.Name) and node.value.id in self.dataframe_vars:
+            self.accesses.append((node.lineno, node.col_offset, node.value.id, node.attr))
+        self.generic_visit(node)
+
+def find_dataframe_attribute_accesses(directory):
+    visitor = DataFrameAttributeVisitor()
+    for root, _, files in os.walk(directory):
+        for file in files:
+            if file.endswith('.py'):
+                file_path = os.path.join(root, file)
+                with open(file_path, 'r', encoding='utf-8') as f:
+                    try:
+                        tree = ast.parse(f.read(), filename=file_path)
+                        visitor.visit(tree)
+                        if visitor.accesses:
+                            print(f'In file {file_path}:')
+                            for lineno, col_offset, var_name, attr in visitor.accesses:
+                                print(f'  Line {lineno}, Column {col_offset}: {var_name}.{attr}')
+                        visitor.accesses = []  # Reset for the next file
+                    except SyntaxError as e:
+                        print(f'Syntax error in file {file_path}: {e}')
+
+# Usage
+path_to_check = sys.argv[1]
+find_dataframe_attribute_accesses(path_to_check)
diff --git a/climada/test/data/LitPop_150arcsec_LUX.hdf5 b/climada/test/data/LitPop_150arcsec_LUX.hdf5
diff --git a/climada/test/data/LitPop_pop_150arcsec_AUT.hdf5 b/climada/test/data/LitPop_pop_150arcsec_AUT.hdf5
diff --git a/climada/test/data/river_flood_150arcsec_rcp26_AUT_2010_2030.hdf5 b/climada/test/data/river_flood_150arcsec_rcp26_AUT_2010_2030.hdf5
diff --git a/climada/test/data/test_haz.h5 b/climada/test/data/test_haz.h5
diff --git a/climada/util/api_client.py b/climada/util/api_client.py
@@ -739,7 +739,7 @@ def purge_cache_db(local_path):
     def _multi_version(datasets):
         ddf = pd.DataFrame(datasets)
         gdf = ddf.groupby("name").agg({"version": "nunique"})
-        return list(gdf[gdf.version > 1].index)
+        return list(gdf[gdf["version"] > 1].index)
 
     def get_hazard(
         self,
@@ -1101,7 +1101,7 @@ def into_datasets_df(dataset_infos):
         """
         dsdf = pd.DataFrame(dataset_infos)
         ppdf = pd.DataFrame([ds.properties for ds in dataset_infos])
-        dtdf = pd.DataFrame([pd.Series(dt) for dt in dsdf.data_type])
+        dtdf = pd.DataFrame([pd.Series(dt) for dt in dsdf["data_type"]])
 
         return (
             dtdf.loc[

diff --git a/climada/util/coordinates.py b/climada/util/coordinates.py
@@ -1582,16 +1582,16 @@ def get_admin1_geometries(countries):
     for country in admin1_info:
         # fill admin 1 region names and codes to GDF for single country:
         gdf_tmp = gpd.GeoDataFrame(columns=gdf.columns)
-        gdf_tmp.admin1_name = [record['name'] for record in admin1_info[country]]
-        gdf_tmp.iso_3166_2 = [record['iso_3166_2'] for record in admin1_info[country]]
+        gdf_tmp['admin1_name'] = [record['name'] for record in admin1_info[country]]
+        gdf_tmp['iso_3166_2'] = [record['iso_3166_2'] for record in admin1_info[country]]
         # With this initiation of GeoSeries in a list comprehension,
         # the ability of geopandas to convert shapereader.Shape to (Multi)Polygon is exploited:
         geoseries = gpd.GeoSeries([gpd.GeoSeries(shape).values[0]
                                    for shape in admin1_shapes[country]])
         gdf_tmp.geometry = list(geoseries)
         # fill columns with country identifiers (admin 0):
-        gdf_tmp.iso_3n = pycountry.countries.lookup(country).numeric
-        gdf_tmp.iso_3a = country
+        gdf_tmp['iso_3n'] = pycountry.countries.lookup(country).numeric
+        gdf_tmp['iso_3a'] = country
         gdf = pd.concat([gdf, gdf_tmp], ignore_index=True)
     return gdf
 
@@ -2401,13 +2401,13 @@ def points_to_raster(points_df, val_names=None, res=0.0, raster_res=0.0, crs=DEF
     if not val_names:
         val_names = ['value']
     if not res:
-        res = np.abs(get_resolution(points_df.latitude.values,
-                                    points_df.longitude.values)).min()
+        res = np.abs(get_resolution(points_df['latitude'].values,
+                                    points_df['longitude'].values)).min()
     if not raster_res:
         raster_res = res
 
     def apply_box(df_exp):
-        fun = lambda r: Point(r.longitude, r.latitude).buffer(res / 2).envelope
+        fun = lambda r: Point(r['longitude'], r['latitude']).buffer(res / 2).envelope
         return df_exp.apply(fun, axis=1)
 
     LOGGER.info('Raster from resolution %s to %s.', res, raster_res)
@@ -2431,16 +2431,16 @@ def apply_box(df_exp):
 
     # renormalize longitude if necessary
     if equal_crs(df_poly.crs, DEF_CRS):
-        xmin, ymin, xmax, ymax = latlon_bounds(points_df.latitude.values,
-                                               points_df.longitude.values)
+        xmin, ymin, xmax, ymax = latlon_bounds(points_df['latitude'].values,
+                                               points_df['longitude'].values)
         x_mid = 0.5 * (xmin + xmax)
         # we don't really change the CRS when rewrapping, so we reset the CRS attribute afterwards
         df_poly = df_poly \
             .to_crs({"proj": "longlat", "lon_wrap": x_mid}) \
             .set_crs(DEF_CRS, allow_override=True)
     else:
-        xmin, ymin, xmax, ymax = (points_df.longitude.min(), points_df.latitude.min(),
-                                  points_df.longitude.max(), points_df.latitude.max())
+        xmin, ymin, xmax, ymax = (points_df['longitude'].min(), points_df['latitude'].min(),
+                                  points_df['longitude'].max(), points_df['latitude'].max())
 
     # construct raster
     rows, cols, ras_trans = pts_to_raster_meta((xmin, ymin, xmax, ymax),
@@ -2666,9 +2666,9 @@ def set_df_geometry_points(df_val, scheduler=None, crs=None):
                       " effect and will be removed in a future version.", DeprecationWarning)
 
     # keep the original crs if any
-    crs = df_val.crs if crs is None else crs  # crs might now still be None
+    crs = df_val['crs'] if crs is None else crs  # crs might now still be None
 
-    df_val.set_geometry(gpd.points_from_xy(df_val.longitude, df_val.latitude),
+    df_val.set_geometry(gpd.points_from_xy(df_val['longitude'], df_val['latitude']),
                         inplace=True, crs=crs)
 
 

diff --git a/climada/util/finance.py b/climada/util/finance.py
@@ -299,14 +299,14 @@ def wealth2gdp(cntry_iso, non_financial=True, ref_year=2016,
     if non_financial:
         try:
             val = factors_all_countries[
-                factors_all_countries.country_iso3 == cntry_iso]['NFW-to-GDP-ratio'].values[0]
+                factors_all_countries['country_iso3'] == cntry_iso]['NFW-to-GDP-ratio'].values[0]
         except (AttributeError, KeyError, IndexError):
             LOGGER.warning('No data for country, using mean factor.')
             val = factors_all_countries["NFW-to-GDP-ratio"].mean()
     else:
         try:
             val = factors_all_countries[
-                factors_all_countries.country_iso3 == cntry_iso]['TW-to-GDP-ratio'].values[0]
+                factors_all_countries['country_iso3'] == cntry_iso]['TW-to-GDP-ratio'].values[0]
         except (AttributeError, KeyError, IndexError):
             LOGGER.warning('No data for country, using mean factor.')
             val = factors_all_countries["TW-to-GDP-ratio"].mean()

diff --git a/climada/util/lines_polys_handler.py b/climada/util/lines_polys_handler.py
@@ -445,7 +445,7 @@ def exp_geom_to_grid(exp, grid, disagg_met, disagg_val):
 
     if disagg_val is not None:
         exp = exp.copy()
-        exp.gdf.value = disagg_val
+        exp.gdf['value'] = disagg_val
 
     if ((disagg_val is None) and ('value' not in exp.gdf.columns)):
         raise ValueError('There is no value column in the exposure gdf to'+
@@ -615,7 +615,7 @@ def _disagg_values_div(gdf_pnts):
     gdf_disagg = gdf_pnts.copy(deep=False)
 
     group = gdf_pnts.groupby(axis=0, level=0)
-    vals = group.value.mean() / group.value.count()
+    vals = group['value'].mean() / group['value'].count()
 
     vals = vals.reindex(gdf_pnts.index, level=0)
     gdf_disagg['value'] = vals