Skip to content

Commit

Permalink
NANs for CAN:
Browse files Browse the repository at this point in the history
* add missing columns, allow nan values through
  • Loading branch information
dshemetov committed Apr 27, 2021
1 parent 9b75e07 commit 65e343f
Show file tree
Hide file tree
Showing 2 changed files with 30 additions and 1 deletion.
24 changes: 24 additions & 0 deletions covid_act_now/delphi_covid_act_now/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,33 @@
from delphi_utils import (
create_export_csv,
S3ArchiveDiffer,
Nans
)

from .constants import GEO_RESOLUTIONS, SIGNALS
from .geo import geo_map
from .pull import load_data, extract_testing_metrics

def add_nancodes(df, signal):
"""Add nancodes to the dataframe."""
# Default missingness codes
df["missing_val"] = Nans.NOT_MISSING
df["missing_se"] = Nans.NOT_MISSING if signal == "pcr_tests_positive" else Nans.NOT_APPLICABLE
df["missing_sample_size"] = (
Nans.NOT_MISSING if signal == "pcr_tests_positive" else Nans.NOT_APPLICABLE
)

# Mark any nans with unknown
val_nans_mask = df["val"].isnull()
df.loc[val_nans_mask, "missing_val"] = Nans.UNKNOWN
if signal == "pcr_tests_positive":
se_nans_mask = df["se"].isnull()
df.loc[se_nans_mask, "missing_se"] = Nans.UNKNOWN
sample_size_nans_mask = df["sample_size"].isnull()
df.loc[sample_size_nans_mask, "missing_sample_size"] = Nans.UNKNOWN

return df

def run_module(params):
"""
Run the CAN testing metrics indicator.
Expand Down Expand Up @@ -56,9 +77,11 @@ def run_module(params):
# Perform geo aggregations and export to receiving
for geo_res in GEO_RESOLUTIONS:
print(f"Processing {geo_res}")
# breakpoint()
df = geo_map(df_county_testing, geo_res)

# Export 'pcr_specimen_positivity_rate'
df = add_nancodes(df, "pcr_tests_positive")
exported_csv_dates = create_export_csv(
df,
export_dir=export_dir,
Expand All @@ -69,6 +92,7 @@ def run_module(params):
df["val"] = df["sample_size"]
df["sample_size"] = np.nan
df["se"] = np.nan
df = add_nancodes(df, "pcr_tests_total")
exported_csv_dates = create_export_csv(
df,
export_dir=export_dir,
Expand Down
7 changes: 6 additions & 1 deletion covid_act_now/tests/test_run.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ def test_output_files(self, clean_receiving_dir):
run_module(self.PARAMS)
csv_files = set(listdir("receiving"))
csv_files.discard(".gitignore")
today = pd.Timestamp.today().date().strftime("%Y%m%d")

expected_files = set()
for signal in SIGNALS:
Expand All @@ -30,7 +31,11 @@ def test_output_files(self, clean_receiving_dir):
# All output files exist
assert csv_files == expected_files

expected_columns = [
"geo_id", "val", "se", "sample_size",
"missing_val", "missing_se", "missing_sample_size"
]
# All output files have correct columns
for csv_file in csv_files:
df = pd.read_csv(join("receiving", csv_file))
assert (df.columns.values == ["geo_id", "val", "se", "sample_size"]).all()
assert (df.columns.values == expected_columns).all()

0 comments on commit 65e343f

Please sign in to comment.