diff --git a/covid_act_now/delphi_covid_act_now/run.py b/covid_act_now/delphi_covid_act_now/run.py index ca50d2b505..482f5f3e03 100644 --- a/covid_act_now/delphi_covid_act_now/run.py +++ b/covid_act_now/delphi_covid_act_now/run.py @@ -10,12 +10,33 @@ from delphi_utils import ( create_export_csv, S3ArchiveDiffer, + Nans ) from .constants import GEO_RESOLUTIONS, SIGNALS from .geo import geo_map from .pull import load_data, extract_testing_metrics +def add_nancodes(df, signal): + """Add nancodes to the dataframe.""" + # Default missingness codes + df["missing_val"] = Nans.NOT_MISSING + df["missing_se"] = Nans.NOT_MISSING if signal == "pcr_tests_positive" else Nans.NOT_APPLICABLE + df["missing_sample_size"] = ( + Nans.NOT_MISSING if signal == "pcr_tests_positive" else Nans.NOT_APPLICABLE + ) + + # Mark any nans with unknown + val_nans_mask = df["val"].isnull() + df.loc[val_nans_mask, "missing_val"] = Nans.UNKNOWN + if signal == "pcr_tests_positive": + se_nans_mask = df["se"].isnull() + df.loc[se_nans_mask, "missing_se"] = Nans.UNKNOWN + sample_size_nans_mask = df["sample_size"].isnull() + df.loc[sample_size_nans_mask, "missing_sample_size"] = Nans.UNKNOWN + + return df + def run_module(params): """ Run the CAN testing metrics indicator. @@ -56,9 +77,11 @@ def run_module(params): # Perform geo aggregations and export to receiving for geo_res in GEO_RESOLUTIONS: print(f"Processing {geo_res}") + # breakpoint() df = geo_map(df_county_testing, geo_res) # Export 'pcr_specimen_positivity_rate' + df = add_nancodes(df, "pcr_tests_positive") exported_csv_dates = create_export_csv( df, export_dir=export_dir, @@ -69,6 +92,7 @@ def run_module(params): df["val"] = df["sample_size"] df["sample_size"] = np.nan df["se"] = np.nan + df = add_nancodes(df, "pcr_tests_total") exported_csv_dates = create_export_csv( df, export_dir=export_dir, diff --git a/covid_act_now/tests/test_run.py b/covid_act_now/tests/test_run.py index 7cec2e1dc3..e3fb9a1c13 100644 --- a/covid_act_now/tests/test_run.py +++ b/covid_act_now/tests/test_run.py @@ -21,6 +21,7 @@ def test_output_files(self, clean_receiving_dir): run_module(self.PARAMS) csv_files = set(listdir("receiving")) csv_files.discard(".gitignore") + today = pd.Timestamp.today().date().strftime("%Y%m%d") expected_files = set() for signal in SIGNALS: @@ -30,7 +31,11 @@ def test_output_files(self, clean_receiving_dir): # All output files exist assert csv_files == expected_files + expected_columns = [ + "geo_id", "val", "se", "sample_size", + "missing_val", "missing_se", "missing_sample_size" + ] # All output files have correct columns for csv_file in csv_files: df = pd.read_csv(join("receiving", csv_file)) - assert (df.columns.values == ["geo_id", "val", "se", "sample_size"]).all() + assert (df.columns.values == expected_columns).all()