Skip to content

Commit 4748df5

Browse files
authored
fix(cell_locations): modify tests for data updates (#505)
1 parent 35d54c6 commit 4748df5

File tree

2 files changed

+37
-16
lines changed

2 files changed

+37
-16
lines changed

tests/test_cyto_utils/conftest.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ def fixture_metadata_input_file_s3() -> str:
4646
"""
4747
Provide a metadata input file for cell_locations test data
4848
"""
49-
return "s3://cellpainting-gallery/cpg0016-jump/source_4/workspace/load_data_csv/2021_08_23_Batch12/BR00126114/load_data_with_illum.parquet"
49+
return "s3://cellpainting-gallery/cpg0016-jump/source_4/workspace/load_data_csv/2021_08_23_Batch12/BR00126114/load_data_with_illum.csv"
5050

5151

5252
@pytest.fixture(name="single_cell_input_file_s3")

tests/test_cyto_utils/test_cell_locations.py

+36-15
Original file line numberDiff line numberDiff line change
@@ -13,18 +13,35 @@ def get_metadata_input_dataframe(cell_loc: CellLocation) -> pd.DataFrame:
1313
from a CellLocation object.
1414
"""
1515

16-
return (
17-
pd.read_parquet(
18-
cell_loc.metadata_input,
19-
# set storage options if we have an s3 path
20-
storage_options={"anon": True}
21-
if isinstance(cell_loc.metadata_input, str)
22-
and cell_loc.metadata_input.startswith("s3://")
23-
else None,
16+
# return a dataframe if it is already a dataframe
17+
if isinstance(cell_loc.metadata_input, pd.DataFrame):
18+
return cell_loc.metadata_input
19+
20+
# try to process a string-based path
21+
if isinstance(cell_loc.metadata_input, str):
22+
storage_opts = (
23+
{"anon": True} if cell_loc.metadata_input.startswith("s3://") else None
2424
)
25-
if isinstance(cell_loc.metadata_input, str)
26-
else cell_loc.metadata_input
27-
)
25+
return (
26+
# read from parquet if we have a parquet object path
27+
pd.read_parquet(
28+
path=cell_loc.metadata_input,
29+
# set storage options if we have an s3 path
30+
storage_options=storage_opts,
31+
)
32+
if cell_loc.metadata_input.endswith(".parquet")
33+
# read from csv if we have a csv object path
34+
else (
35+
pd.read_csv(
36+
filepath_or_buffer=cell_loc.metadata_input,
37+
# set storage options if we have an s3 path
38+
storage_options=storage_opts,
39+
)
40+
)
41+
)
42+
else:
43+
# otherwise raise an error as we don't have a supported format
44+
raise ValueError("Unsupported metadata_input type")
2845

2946

3047
@pytest.mark.parametrize(
@@ -80,11 +97,15 @@ def test_output_value_correctness(
8097
cell_loc = cls_cell_loc.add_cell_location()
8198
metadata_input_dataframe = get_metadata_input_dataframe(cell_loc=cls_cell_loc)
8299

100+
# Cast cell_loc columns to the data types of metadata_input_dataframe columns
101+
# (observed metadata_site as having different types)
102+
for col in metadata_input_dataframe.columns:
103+
cell_loc[col] = cell_loc[col].astype(metadata_input_dataframe[col].dtype)
104+
83105
# if we restrict the columns of cell_loc to the ones in metadata_input_dataframe, we should get the same dataframe
84-
assert (
85-
cell_loc[metadata_input_dataframe.columns]
86-
.reset_index(drop=True)
87-
.equals(metadata_input_dataframe.reset_index(drop=True))
106+
pd.testing.assert_frame_equal(
107+
cell_loc[metadata_input_dataframe.columns].reset_index(drop=True),
108+
metadata_input_dataframe.reset_index(drop=True),
88109
)
89110

90111
# gather an engine from the cell_loc class

0 commit comments

Comments
 (0)