ci(mypy): add mypy check and adjust code for types

d33bs · d33bs · commit 8478dbdf6553 · 2024-09-16T09:27:35.000-06:00
diff --git a/.github/workflows/integration-test.yml b/.github/workflows/integration-test.yml
@@ -56,6 +56,28 @@ jobs:
         uses: pre-commit/action@v3.0.0
         with:
           extra_args: --all-files
+  python-type-checks:
+    # This job is used to check Python types
+    name: Python type checks
+    # Avoid fail-fast to retain output
+    strategy:
+      fail-fast: false
+    runs-on: ubuntu-22.04
+    if: github.event_name != 'schedule'
+    steps:
+      - name: Checkout repo
+        uses: actions/checkout@v4
+      - name: Setup python, and check pre-commit cache
+        uses: ./.github/actions/setup-env
+        with:
+          python-version: ${{ env.TARGET_PYTHON_VERSION }}
+          cache-pre-commit: false
+          cache-venv: true
+          setup-poetry: true
+          install-deps: true
+      - name: Run mypy
+        run: |
+          poetry run mypy .
   integration-test:
     name: Pytest (Python ${{ matrix.python-version }} on ${{ matrix.os }})
     # Runs pytest on all tested versions of python and OSes
diff --git a/poetry.lock b/poetry.lock
diff --git a/pycytominer/aggregate.py b/pycytominer/aggregate.py
@@ -121,7 +121,7 @@ def aggregate(
         population_df = population_df.drop([columns_to_drop], axis="columns")
 
     if output_file is not None:
-        output(
+        return output(
             df=population_df,
             output_filename=output_file,
             output_type=output_type,
diff --git a/pycytominer/cyto_utils/DeepProfiler_processing.py b/pycytominer/cyto_utils/DeepProfiler_processing.py
@@ -7,8 +7,9 @@
 import pandas as pd
 import warnings
 
-from pycytominer import aggregate, normalize
-from pycytominer.cyto_utils import (
+# use mypy ignores below to avoid duplicate import warnings
+from pycytominer import aggregate, normalize  # type: ignore[no-redef]
+from pycytominer.cyto_utils import (  # type: ignore[no-redef]
     load_npz_features,
     load_npz_locations,
     infer_cp_features,
diff --git a/pycytominer/cyto_utils/cells.py b/pycytominer/cyto_utils/cells.py
@@ -714,7 +714,7 @@ def merge_single_cells(
         """
 
         # Load the single cell dataframe by merging on the specific linking columns
-        sc_df = ""
+        left_compartment_loaded = False
         linking_check_cols = []
         merge_suffix_rename = []
         for left_compartment in self.compartment_linking_cols:
@@ -737,7 +737,7 @@ def merge_single_cells(
                     left_compartment
                 ]
 
-                if isinstance(sc_df, str):
+                if not left_compartment_loaded:
                     sc_df = self.load_compartment(compartment=left_compartment)
 
                     if compute_subsample:
@@ -752,6 +752,8 @@ def merge_single_cells(
                             sc_df, how="left", on=subset_logic_df.columns.tolist()
                         ).reindex(sc_df.columns, axis="columns")
 
+                    left_compartment_loaded = True
+
                 sc_df = sc_df.merge(
                     self.load_compartment(compartment=right_compartment),
                     left_on=[*self.merge_cols, left_link_col],
@@ -804,11 +806,13 @@ def merge_single_cells(
 
             normalize_args["features"] = features
 
-            sc_df = normalize(profiles=sc_df, **normalize_args)
+            # ignore mypy warnings below as these reference root package imports
+            sc_df = normalize(profiles=sc_df, **normalize_args)  # type: ignore[operator]
 
         # In case platemap metadata is provided, use pycytominer.annotate for metadata
         if platemap is not None:
-            sc_df = annotate(
+            # ignore mypy warnings below as these reference root package imports
+            sc_df = annotate(  # type: ignore[operator]
                 profiles=sc_df, platemap=platemap, output_file=None, **kwargs
             )
 
diff --git a/pycytominer/cyto_utils/collate.py b/pycytominer/cyto_utils/collate.py
@@ -131,7 +131,7 @@ def collate(
         with sqlite3.connect(cache_backend_file, isolation_level=None) as connection:
             cursor = connection.cursor()
             if column:
-                if print:
+                if printtoscreen:
                     print(f"Adding a Metadata_Plate column based on column {column}")
                 cursor.execute("ALTER TABLE Image ADD COLUMN Metadata_Plate TEXT;")
                 cursor.execute(f"UPDATE image SET Metadata_Plate ={column};")
diff --git a/pycytominer/cyto_utils/output.py b/pycytominer/cyto_utils/output.py
@@ -12,10 +12,10 @@
 def output(
     df: pd.DataFrame,
     output_filename: str,
-    output_type: str = "csv",
+    output_type: Optional[str] = "csv",
     sep: str = ",",
     float_format: Optional[str] = None,
-    compression_options: Union[str, Dict] = {"method": "gzip", "mtime": 1},
+    compression_options: Optional[Union[str, Dict]] = {"method": "gzip", "mtime": 1},
     **kwargs,
 ):
     """Given an output file and compression options, write file to disk
@@ -79,6 +79,10 @@ def output(
     )
     """
 
+    # ensure a default output type
+    if output_type is None:
+        output_type = "csv"
+
     if output_type == "csv":
         compression_options = set_compression_method(compression=compression_options)
 
@@ -98,7 +102,7 @@ def output(
     return output_filename
 
 
-def set_compression_method(compression: Union[str, Dict]):
+def set_compression_method(compression: Optional[Union[str, Dict]]):
     """Set the compression options
 
     Parameters
diff --git a/pyproject.toml b/pyproject.toml
@@ -73,6 +73,7 @@ pytest-cov = "^4.1.0"
 pre-commit = ">=3.3.2"
 commitizen = "^3.12.0"
 ruff = "^0.3.4"
+mypy = "^1.11.2"
 
 [tool.poetry.group.docs]
 optional = true
@@ -177,6 +178,18 @@ preview = true
 [tool.pytest.ini_options]
 testpaths = "tests"
 
+[tool.mypy]
+# ignores optionally added type packages
+ignore_missing_imports = true
+# ignores redefinition of variable labels to new types
+allow_redefinition = true
+exclude = [
+    # ignore notebook-based walkthroughs
+    "walkthroughs",
+    # ignore tests dir
+    "tests"
+]
+
 [build-system]
 requires = ["poetry-core>=1.7.0", "poetry-dynamic-versioning>=1.1.0"]
 build-backend = "poetry_dynamic_versioning.backend"
diff --git a/tests/test_cyto_utils/test_output.py b/tests/test_cyto_utils/test_output.py
@@ -56,6 +56,20 @@ def test_output_default():
         result, DATA_DF, check_names=False, check_exact=False, atol=1e-3
     )
 
+    # test with an output_type of None
+    output_result = output(
+        df=DATA_DF,
+        output_filename=output_filename,
+        compression_options=TEST_COMPRESSION_OPTIONS,
+        float_format=None,
+        output_type=None,
+    )
+    result = pd.read_csv(output_result)
+
+    pd.testing.assert_frame_equal(
+        result, DATA_DF, check_names=False, check_exact=False, atol=1e-3
+    )
+
 
 def test_output_tsv():
     # Test input filename of writing a tab separated file