diff --git a/CHANGELOG.md b/CHANGELOG.md index 4a2a5f89..45bfce3d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,51 @@ +## v1.1.0 (2024-03-05) + +[Detailed release notes](https://github.com/cytomining/pycytominer/releases/tag/v1.1.0) + +### Fix + +- **build**: fix build versioning +- simplify Spherize transform – epsilon to regularize instead of clip, add additional checks (see #320) + +### Refactor + +- **docs**: apply flake8-builtins checks +- **dev**: apply pyflakes checks +- **dev**: apply flake8-simplify checks + +### Test + +- add flake8-bandit ignores +- add clarifying comments for cell_loc test +- update cell_loc s3 paths and testing + +### Docs + +- update Readme with Citation section +- **template**: PR template attribution to comment +- **changelog**: add commitizen template +- add description of ruff linting/formatting +- reorganize style guide + +### CI + +- add versioned artifact build action +- **integration-test**: add explicit artifact retention time +- add pygrep-hooks and flake8-20202 checks + +### Style + +- swap out black for ruff-format +- apply pyupgrade checks +- **devcontainer**: add ruff extension +- apply ruff native checks +- apply pycodestyle checks +- add flake8-comprehensions checks + +### Build + +- **poetry**: make dev dep group optional + ## v1.0.1 (2023-11-07) [Detailed Release Notes](https://github.com/cytomining/pycytominer/releases/tag/v1.0.1) diff --git a/CITATION.cff b/CITATION.cff index fa0b28dd..863ca7a9 100644 --- a/CITATION.cff +++ b/CITATION.cff @@ -85,7 +85,7 @@ authors: orcid: https://orcid.org/0000-0002-0503-9348 title: "Reproducible image-based profiling with Pycytominer" # This version is updated using `cz bump` command -version: "1.0.1" +version: "1.1.0" license: BSD 3-Clause License repository-code: "https://github.com/cytomining/pycytominer" doi: 10.48550/arXiv.2311.13417 diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index d4af6316..5899dbcb 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -179,9 +179,9 @@ Creating a new release includes the following steps: 1. Create a new branch from `main` for the release (e.g. `release-v1.0.0`) 2. Review the [commit history](https://github.com/cytomining/pycytominer/compare) from the last release and check whether it includes commits that don't follow the [conventional commit standard](https://www.conventionalcommits.org/en/v1.0.0/#summary). If all changes follow conventional commits, skip to step 5. -3. Run the command `cz bump --files-only` to update the version number in `CITATION.cff` and `pyproject.toml:tool.commitizen` and generate the draft changelog. +3. Run the command `poetry run cz bump --files-only` to update the version number in `CITATION.cff` and `pyproject.toml:tool.commitizen` and generate the draft changelog. 4. Review the changes to `CHANGELOG.md`. If necessary, add descriptions of missing changes and modify descriptions to match conventional commits standard. -5. `git add` any manual changes and run `cz bump` to create the release commit. +5. `git add` any manual changes and run `poetry run cz bump` to create the release commit. Push the changes to the release branch. 6. Create a pull request for the release branch into `main`. 7. Request a review from another maintainer. diff --git a/dev_tools/commitizen/CHANGELOG.md.j2 b/dev_tools/commitizen/CHANGELOG.md.j2 index eb4f83a3..fb921405 100644 --- a/dev_tools/commitizen/CHANGELOG.md.j2 +++ b/dev_tools/commitizen/CHANGELOG.md.j2 @@ -4,7 +4,7 @@ ## {{ entry.version }}{% if entry.date %} ({{ entry.date }}){% endif %} {# Add link to the detailed GitHub release notes #} -[Detailed Release Notes](https://github.com/cytomining/pycytominer/releases/tag/{{ entry.version }}) +[Detailed release notes](https://github.com/cytomining/pycytominer/releases/tag/{{ entry.version }}) {% for change_key, changes in entry.changes.items() %} diff --git a/pyproject.toml b/pyproject.toml index d39c9cc1..e8d8dcfd 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -101,7 +101,7 @@ files = ["pycytominer/__about__.py"] [tool.commitizen] # This version is used for changelog tracking and is updated using `cz bump` -version = "1.0.1" +version = "1.1.0" name = "cz_conventional_commits" tag_format = "v$version" version_scheme = "pep440" diff --git a/tests/test_cyto_utils/conftest.py b/tests/test_cyto_utils/conftest.py index a6c78821..daf599c4 100644 --- a/tests/test_cyto_utils/conftest.py +++ b/tests/test_cyto_utils/conftest.py @@ -44,7 +44,7 @@ def fixture_metadata_input_file_s3() -> str: """ Provide a metadata input file for cell_locations test data """ - return "s3://cellpainting-gallery/test-cpg0016-jump/source_4/workspace/load_data_csv/2021_08_23_Batch12/BR00126114/test_BR00126114_load_data_with_illum.parquet" + return "s3://cellpainting-gallery/cpg0016-jump/source_4/workspace/load_data_csv/2021_08_23_Batch12/BR00126114/load_data_with_illum.parquet" @pytest.fixture(name="single_cell_input_file_s3") @@ -52,7 +52,7 @@ def fixture_single_cell_input_file_s3() -> str: """ Provide a single cell input file for cell_locations test data """ - return "s3://cellpainting-gallery/test-cpg0016-jump/source_4/workspace/backend/2021_08_23_Batch12/BR00126114/test_BR00126114.sqlite" + return "s3://cellpainting-gallery/cpg0016-jump/source_4/workspace/backend/2021_08_23_Batch12/BR00126114/BR00126114.sqlite" @pytest.fixture(name="metadata_input_dataframe") @@ -113,27 +113,3 @@ def fixture_cell_loc_obj3( metadata_input=metadata_input_file_s3, single_cell_input=single_cell_input_file_s3, ) - - -@pytest.fixture(name="cell_loc1") -def fixture_cell_loc1(cell_loc_obj1: CellLocation) -> pd.DataFrame: - """ - Provide the output of running CellLocation.add_cell_location - """ - return cell_loc_obj1.add_cell_location() - - -@pytest.fixture(name="cell_loc2") -def fixture_cell_loc2(cell_loc_obj2: CellLocation) -> pd.DataFrame: - """ - Provide the output of running CellLocation.add_cell_location - """ - return cell_loc_obj2.add_cell_location() - - -@pytest.fixture(name="cell_loc3") -def fixture_cell_loc3(cell_loc_obj3: CellLocation) -> pd.DataFrame: - """ - Provide the output of running CellLocation.add_cell_location - """ - return cell_loc_obj3.add_cell_location() diff --git a/tests/test_cyto_utils/test_cell_locations.py b/tests/test_cyto_utils/test_cell_locations.py index 4426c801..54f8db1d 100644 --- a/tests/test_cyto_utils/test_cell_locations.py +++ b/tests/test_cyto_utils/test_cell_locations.py @@ -2,22 +2,50 @@ import pandas as pd import pytest -import sqlalchemy -from typing import Type +from typing import Type, List +from pycytominer.cyto_utils.cell_locations import CellLocation from _pytest.fixtures import FixtureRequest -@pytest.mark.parametrize("cell_loc", ["cell_loc1", "cell_loc2", "cell_loc3"]) +def get_metadata_input_dataframe(cell_loc: CellLocation) -> pd.DataFrame: + """ + Gathers the metadata input dataframe given various conditions + from a CellLocation object. + """ + + return ( + pd.read_parquet( + cell_loc.metadata_input, + # set storage options if we have an s3 path + storage_options={"anon": True} + if isinstance(cell_loc.metadata_input, str) + and cell_loc.metadata_input.startswith("s3://") + else None, + ) + if isinstance(cell_loc.metadata_input, str) + else cell_loc.metadata_input + ) + + +@pytest.mark.parametrize( + "cell_loc_param", + [ + "cell_loc_obj1", + "cell_loc_obj2", + "cell_loc_obj3", + ], +) def test_output_shape_and_required_columns( - cell_loc: str, - metadata_input_dataframe: pd.DataFrame, + cell_loc_param: List[str], request: Type[FixtureRequest], ): """ This tests the shape of the output from CellLocation class and verifies that the required columns are present """ - cell_loc = request.getfixturevalue(cell_loc) + cls_cell_loc = request.getfixturevalue(cell_loc_param) + cell_loc = cls_cell_loc.add_cell_location() + metadata_input_dataframe = get_metadata_input_dataframe(cell_loc=cls_cell_loc) # check the shape of the data assert cell_loc.shape == ( @@ -31,17 +59,25 @@ def test_output_shape_and_required_columns( assert "Nuclei_Location_Center_Y" in cell_loc["CellCenters"][0][0] -@pytest.mark.parametrize("cell_loc", ["cell_loc1", "cell_loc2", "cell_loc3"]) +@pytest.mark.parametrize( + "cell_loc_param", + [ + "cell_loc_obj1", + "cell_loc_obj2", + "cell_loc_obj3", + ], +) def test_output_value_correctness( - cell_loc: str, - metadata_input_dataframe: pd.DataFrame, - single_cell_input_file: str, + cell_loc_param: List[str], request: Type[FixtureRequest], ): """ This tests the correctness of the values in the output from CellLocation class by comparing the values in the output to the values in the input """ - cell_loc = request.getfixturevalue(cell_loc) + + cls_cell_loc = request.getfixturevalue(cell_loc_param) + cell_loc = cls_cell_loc.add_cell_location() + metadata_input_dataframe = get_metadata_input_dataframe(cell_loc=cls_cell_loc) # if we restrict the columns of cell_loc to the ones in metadata_input_dataframe, we should get the same dataframe assert ( @@ -50,7 +86,8 @@ def test_output_value_correctness( .equals(metadata_input_dataframe.reset_index(drop=True)) ) - engine = sqlalchemy.create_engine(f"sqlite:///{single_cell_input_file}") + # gather an engine from the cell_loc class + _, engine = cls_cell_loc._get_single_cell_engine() nuclei_query = "SELECT ImageNumber, ObjectNumber, Nuclei_Location_Center_X, Nuclei_Location_Center_Y FROM Nuclei;" @@ -59,7 +96,9 @@ def test_output_value_correctness( # get the values in the Nuclear_Location_Center_X and Nuclear_Location_Center_Y columns # for the rows in nuclei_df that have ImageNumber == 1 - nuclei_df_row1 = nuclei_df[nuclei_df["ImageNumber"] == "1"] + # note: we cast to "int64" type to ensure all cell_loc_obj's are treated the same + # (some include ImageNumber's of type obj, others are int64) + nuclei_df_row1 = nuclei_df[nuclei_df["ImageNumber"].astype("int64") == 1] observed_x = [x["Nuclei_Location_Center_X"] for x in cell_loc.CellCenters[0]] observed_y = [x["Nuclei_Location_Center_Y"] for x in cell_loc.CellCenters[0]]