More pylint and mypy findings

astronomy-commons · Oct 25, 2023 · ffb69a1 · ffb69a1
1 parent d9153a8
commit ffb69a1
Show file tree

Hide file tree

Showing 4 changed files with 18 additions and 17 deletions.
diff --git a/src/lsdb/loaders/dataframe/dataframe_catalog_loader.py b/src/lsdb/loaders/dataframe/dataframe_catalog_loader.py
@@ -28,7 +28,7 @@ class DataframeCatalogLoader:
 
     def __init__(
         self,
-        df: pd.DataFrame,
+        dataframe: pd.DataFrame,
         lowest_order: int = 0,
         highest_order: int = 5,
         partition_size: float | None = None,
@@ -38,14 +38,14 @@ def __init__(
         """Initializes a DataframeCatalogLoader
 
         Args:
-            df (pd.Dataframe): Catalog Pandas Dataframe
+            dataframe (pd.Dataframe): Catalog Pandas Dataframe
             lowest_order (int): The lowest partition order
             highest_order (int): The highest partition order
             partition_size (float): The desired partition size, in megabytes
             threshold (int): The maximum number of data points per pixel
             **kwargs: Arguments to pass to the creation of the catalog info
         """
-        self.df = df
+        self.dataframe = dataframe
         self.lowest_order = lowest_order
         self.highest_order = highest_order
         self.threshold = self._calculate_threshold(partition_size, threshold)
@@ -66,11 +66,11 @@ def _calculate_threshold(self, partition_size: float | None = None, threshold: i
             raise ValueError("Specify only one: threshold or partition_size")
         if threshold is None:
             if partition_size is not None:
-                df_size_bytes = self.df.memory_usage().sum()
+                df_size_bytes = self.dataframe.memory_usage().sum()
                 # Round the number of partitions to the next integer, otherwise the
                 # number of pixels per partition may exceed the threshold
                 num_partitions = math.ceil(df_size_bytes / (partition_size * (1 << 20)))
-                threshold = len(self.df.index) // num_partitions
+                threshold = len(self.dataframe.index) // num_partitions
             else:
                 threshold = DataframeCatalogLoader.DEFAULT_THRESHOLD
         return threshold
@@ -107,11 +107,11 @@ def load_catalog(self) -> Catalog:
     def _set_hipscat_index(self):
         """Generates the hipscat indices for each data point and assigns
         the hipscat index column as the Dataframe index."""
-        self.df[HIPSCAT_ID_COLUMN] = compute_hipscat_id(
-            ra_values=self.df[self.catalog_info.ra_column],
-            dec_values=self.df[self.catalog_info.dec_column],
+        self.dataframe[HIPSCAT_ID_COLUMN] = compute_hipscat_id(
+            ra_values=self.dataframe[self.catalog_info.ra_column],
+            dec_values=self.dataframe[self.catalog_info.dec_column],
         )
-        self.df.set_index(HIPSCAT_ID_COLUMN, inplace=True)
+        self.dataframe.set_index(HIPSCAT_ID_COLUMN, inplace=True)
 
     def _compute_pixel_map(self) -> Dict[HealpixPixel, HealpixInfo]:
         """Compute object histogram and generate the mapping between
@@ -123,7 +123,7 @@ def _compute_pixel_map(self) -> Dict[HealpixPixel, HealpixInfo]:
             of objects in the HEALPix pixel, the second is the list of pixels
         """
         raw_histogram = generate_histogram(
-            self.df,
+            self.dataframe,
             highest_order=self.highest_order,
             ra_column=self.catalog_info.ra_column,
             dec_column=self.catalog_info.dec_column,
@@ -161,7 +161,7 @@ def _generate_dask_df_and_map(
             ddf_pixel_map[hp_pixel] = hp_pixel_index
 
         # Generate Dask Dataframe with original schema
-        schema = pd.DataFrame(columns=self.df.columns).astype(self.df.dtypes)
+        schema = pd.DataFrame(columns=self.dataframe.columns).astype(self.dataframe.dtypes)
         ddf = self._generate_dask_dataframe(pixel_dfs, schema)
 
         return ddf, ddf_pixel_map
@@ -199,4 +199,4 @@ def _get_dataframe_for_healpix(self, pixels: List[int]) -> pd.DataFrame:
         """
         left_bound = healpix_to_hipscat_id(self.highest_order, pixels[0])
         right_bound = healpix_to_hipscat_id(self.highest_order, pixels[-1] + 1)
-        return self.df.loc[(self.df.index >= left_bound) & (self.df.index < right_bound)]
+        return self.dataframe.loc[(self.dataframe.index >= left_bound) & (self.dataframe.index < right_bound)]
diff --git a/src/lsdb/loaders/dataframe/from_dataframe.py b/src/lsdb/loaders/dataframe/from_dataframe.py
@@ -7,7 +7,7 @@
 
 
 def from_dataframe(
-    df: pd.DataFrame,
+    dataframe: pd.DataFrame,
     lowest_order: int = 0,
     highest_order: int = 5,
     partition_size: float | None = None,
@@ -17,7 +17,7 @@ def from_dataframe(
     """Load a catalog from a Pandas Dataframe in CSV format.
 
     Args:
-        df (pd.Dataframe): The catalog Pandas Dataframe
+        dataframe (pd.Dataframe): The catalog Pandas Dataframe
         lowest_order (int): The lowest partition order
         highest_order (int): The highest partition order
         partition_size (float): The desired partition size, in megabytes
@@ -27,5 +27,7 @@ def from_dataframe(
     Returns:
         Catalog object loaded from the given parameters
     """
-    loader = DataframeCatalogLoader(df, lowest_order, highest_order, partition_size, threshold, **kwargs)
+    loader = DataframeCatalogLoader(
+        dataframe, lowest_order, highest_order, partition_size, threshold, **kwargs
+    )
     return loader.load_catalog()
diff --git a/src/lsdb/loaders/hipscat/read_hipscat.py b/src/lsdb/loaders/hipscat/read_hipscat.py
@@ -54,7 +54,7 @@ def read_hipscat(
 
 
 def _get_dataset_class_from_catalog_info(
-    base_catalog_path: str, storage_options: dict = None
+    base_catalog_path: str, storage_options: Union[Dict[Any, Any], None] = None
 ) -> Type[Dataset]:
     base_catalog_dir = hc.io.get_file_pointer_from_path(base_catalog_path)
     catalog_info_path = hc.io.paths.get_catalog_info_pointer(base_catalog_dir)

diff --git a/tests/lsdb/loaders/dataframe/test_from_dataframe.py b/tests/lsdb/loaders/dataframe/test_from_dataframe.py
@@ -2,7 +2,6 @@
 import pandas as pd
 import pytest
 from hipscat.catalog import CatalogType
-from hipscat.pixel_math import HealpixPixel
 from hipscat.pixel_tree.pixel_node_type import PixelNodeType
 
 import lsdb