diff --git a/resources/ResourceFile_RTI/RTI_emulator.pkl b/resources/ResourceFile_RTI/RTI_emulator.pkl new file mode 100644 index 0000000000..5312c280f5 --- /dev/null +++ b/resources/ResourceFile_RTI/RTI_emulator.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5bc5d14a555d635888a7fb760158493c79969281ed574aa871d1296e1bdb4ec9 +size 35333224 diff --git a/resources/ResourceFile_RTI/parameter_values.csv b/resources/ResourceFile_RTI/parameter_values.csv index 1167d1e7b5..98b99652b7 100644 --- a/resources/ResourceFile_RTI/parameter_values.csv +++ b/resources/ResourceFile_RTI/parameter_values.csv @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:9ca079273006733c1c5f232de53d3ecc88bc8d1f6a9b6051dea7da29cb9df3cd -size 5049 +oid sha256:80e33bccc9ecd960e8c9a260ae0582214fb70ef32d6295dccaa0ca717869e489 +size 5229 diff --git a/src/scripts/healthsystem/impact_of_actual_vs_funded/analysis_extract_data.py b/src/scripts/healthsystem/impact_of_actual_vs_funded/analysis_extract_data.py new file mode 100644 index 0000000000..4f2f0b8f35 --- /dev/null +++ b/src/scripts/healthsystem/impact_of_actual_vs_funded/analysis_extract_data.py @@ -0,0 +1,317 @@ +"""Produce plots to show the health impact (deaths, dalys) each the healthcare system (overall health impact) when +running under different MODES and POLICIES (scenario_impact_of_actual_vs_funded.py)""" + +# short tclose -> ideal case +# long tclose -> status quo +import argparse +from pathlib import Path +from typing import Tuple + +import numpy as np +import pandas as pd +from matplotlib import pyplot as plt +from matplotlib.colors import Normalize +from scipy.optimize import curve_fit + +from tlo import Date +from tlo.analysis.utils import extract_results, summarize +from tlo.analysis.life_expectancy import get_life_expectancy_estimates + + +# Range of years considered +min_year = 2010 +max_year = 2040 + + +def apply(results_folder: Path, output_folder: Path, resourcefilepath: Path = None, ): + """Produce standard set of plots describing the effect of each TREATMENT_ID. + - We estimate the epidemiological impact as the EXTRA deaths that would occur if that treatment did not occur. + - We estimate the draw on healthcare system resources as the FEWER appointments when that treatment does not occur. + """ + + TARGET_PERIOD = (Date(min_year, 1, 1), Date(max_year, 1, 1)) + + # Definitions of general helper functions + make_graph_file_name = lambda stub: output_folder / f"{stub.replace('*', '_star_')}.png" # noqa: E731 + + def target_period() -> str: + """Returns the target period as a string of the form YYYY-YYYY""" + return "-".join(str(t.year) for t in TARGET_PERIOD) + + def get_parameter_names_from_scenario_file() -> Tuple[str]: + """Get the tuple of names of the scenarios from `Scenario` class used to create the results.""" + from scripts.healthsystem.impact_of_actual_vs_funded.scenario_impact_of_actual_vs_funded import ( + ImpactOfHealthSystemMode, + ) + e = ImpactOfHealthSystemMode() + return tuple(e._scenarios.keys()) + + def get_num_deaths(_df): + """Return total number of Deaths (total within the TARGET_PERIOD) + """ + return pd.Series(data=len(_df.loc[pd.to_datetime(_df.date).between(*TARGET_PERIOD)])) + + def get_num_dalys(_df): + """Return total number of DALYs (Stacked) by label (total within the TARGET_PERIOD)""" + return pd.Series( + data=_df + .loc[_df.year.between(*[i.year for i in TARGET_PERIOD])] + .drop(columns=['date', 'sex', 'age_range', 'year']) + .sum().sum() + ) + + def get_num_dalys_by_cause(_df): + """Return number of DALYs by cause by label (total within the TARGET_PERIOD)""" + return pd.Series( + data=_df + .loc[_df.year.between(*[i.year for i in TARGET_PERIOD])] + .drop(columns=['date', 'sex', 'age_range', 'year']) + .sum() + ) + + def set_param_names_as_column_index_level_0(_df): + """Set the columns index (level 0) as the param_names.""" + ordered_param_names_no_prefix = {i: x for i, x in enumerate(param_names)} + names_of_cols_level0 = [ordered_param_names_no_prefix.get(col) for col in _df.columns.levels[0]] + assert len(names_of_cols_level0) == len(_df.columns.levels[0]) + _df.columns = _df.columns.set_levels(names_of_cols_level0, level=0) + return _df + + def find_difference_relative_to_comparison(_ser: pd.Series, + comparison: str, + scaled: bool = False, + drop_comparison: bool = True, + ): + """Find the difference in the values in a pd.Series with a multi-index, between the draws (level 0) + within the runs (level 1), relative to where draw = `comparison`. + The comparison is `X - COMPARISON`.""" + return _ser \ + .unstack(level=0) \ + .apply(lambda x: (x - x[comparison]) / (x[comparison] if scaled else 1.0), axis=1) \ + .drop(columns=([comparison] if drop_comparison else [])) \ + .stack() + + + def get_counts_of_hsi_by_treatment_id(_df): + """Get the counts of the short TREATMENT_IDs occurring""" + _counts_by_treatment_id = _df \ + .loc[pd.to_datetime(_df['date']).between(*TARGET_PERIOD), 'TREATMENT_ID'] \ + .apply(pd.Series) \ + .sum() \ + .astype(int) + return _counts_by_treatment_id.groupby(level=0).sum() + + year_target = 2023 + def get_counts_of_hsi_by_treatment_id_by_year(_df): + """Get the counts of the short TREATMENT_IDs occurring""" + _counts_by_treatment_id = _df \ + .loc[pd.to_datetime(_df['date']).dt.year ==year_target, 'TREATMENT_ID'] \ + .apply(pd.Series) \ + .sum() \ + .astype(int) + return _counts_by_treatment_id.groupby(level=0).sum() + + def get_counts_of_hsi_by_short_treatment_id(_df): + """Get the counts of the short TREATMENT_IDs occurring (shortened, up to first underscore)""" + _counts_by_treatment_id = get_counts_of_hsi_by_treatment_id(_df) + _short_treatment_id = _counts_by_treatment_id.index.map(lambda x: x.split('_')[0] + "*") + return _counts_by_treatment_id.groupby(by=_short_treatment_id).sum() + + def get_counts_of_hsi_by_short_treatment_id_by_year(_df): + """Get the counts of the short TREATMENT_IDs occurring (shortened, up to first underscore)""" + _counts_by_treatment_id = get_counts_of_hsi_by_treatment_id_by_year(_df) + _short_treatment_id = _counts_by_treatment_id.index.map(lambda x: x.split('_')[0] + "*") + return _counts_by_treatment_id.groupby(by=_short_treatment_id).sum() + + + # Obtain parameter names for this scenario file + param_names = get_parameter_names_from_scenario_file() + print(param_names) + + # ================================================================================================ + # TIME EVOLUTION OF TOTAL DALYs + # Plot DALYs averted compared to the ``No Policy'' policy + + year_target = 2023 # This global variable will be passed to custom function + def get_num_dalys_by_year(_df): + """Return total number of DALYs (Stacked) by label (total within the TARGET_PERIOD)""" + return pd.Series( + data=_df + .loc[_df.year == year_target] + .drop(columns=['date', 'sex', 'age_range', 'year']) + .sum().sum() + ) + + ALL = {} + # Plot time trend show year prior transition as well to emphasise that until that point DALYs incurred + # are consistent across different policies + this_min_year = 2010 + for year in range(this_min_year, max_year+1): + year_target = year + num_dalys_by_year = extract_results( + results_folder, + module='tlo.methods.healthburden', + key='dalys_stacked', + custom_generate_series=get_num_dalys_by_year, + do_scaling=True + ).pipe(set_param_names_as_column_index_level_0) + ALL[year_target] = num_dalys_by_year + # Concatenate the DataFrames into a single DataFrame + concatenated_df = pd.concat(ALL.values(), keys=ALL.keys()) + concatenated_df.index = concatenated_df.index.set_names(['date', 'index_original']) + concatenated_df = concatenated_df.reset_index(level='index_original',drop=True) + dalys_by_year = concatenated_df + print(dalys_by_year) + dalys_by_year.to_csv('ConvertedOutputs/Total_DALYs_with_time.csv', index=True) + + # ================================================================================================ + # Print population under each scenario + pop_model = extract_results(results_folder, + module="tlo.methods.demography", + key="population", + column="total", + index="date", + do_scaling=True + ).pipe(set_param_names_as_column_index_level_0) + + pop_model.index = pop_model.index.year + pop_model = pop_model[(pop_model.index >= this_min_year) & (pop_model.index <= max_year)] + print(pop_model) + assert dalys_by_year.index.equals(pop_model.index) + assert all(dalys_by_year.columns == pop_model.columns) + pop_model.to_csv('ConvertedOutputs/Population_with_time.csv', index=True) + + # ================================================================================================ + # DALYs BROKEN DOWN BY CAUSES AND YEAR + # DALYs by cause per year + # %% Quantify the health losses associated with all interventions combined. + + year_target = 2023 # This global variable will be passed to custom function + def get_num_dalys_by_year_and_cause(_df): + """Return total number of DALYs (Stacked) by label (total within the TARGET_PERIOD)""" + return pd.Series( + data=_df + .loc[_df.year == year_target] + .drop(columns=['date', 'sex', 'age_range', 'year']) + .sum() + ) + + ALL = {} + # Plot time trend show year prior transition as well to emphasise that until that point DALYs incurred + # are consistent across different policies + this_min_year = 2010 + for year in range(this_min_year, max_year+1): + year_target = year + num_dalys_by_year = extract_results( + results_folder, + module='tlo.methods.healthburden', + key='dalys_stacked', + custom_generate_series=get_num_dalys_by_year_and_cause, + do_scaling=True + ).pipe(set_param_names_as_column_index_level_0) + ALL[year_target] = num_dalys_by_year #summarize(num_dalys_by_year) + + # Concatenate the DataFrames into a single DataFrame + concatenated_df = pd.concat(ALL.values(), keys=ALL.keys()) + + concatenated_df.index = concatenated_df.index.set_names(['date', 'cause']) + + df_total = concatenated_df + df_total.to_csv('ConvertedOutputs/DALYS_by_cause_with_time.csv', index=True) + + ALL = {} + # Plot time trend show year prior transition as well to emphasise that until that point DALYs incurred + # are consistent across different policies + for year in range(min_year, max_year+1): + year_target = year + + hsi_delivered_by_year = extract_results( + results_folder, + module='tlo.methods.healthsystem.summary', + key='HSI_Event', + custom_generate_series=get_counts_of_hsi_by_short_treatment_id_by_year, + do_scaling=True + ).pipe(set_param_names_as_column_index_level_0) + ALL[year_target] = hsi_delivered_by_year + + # Concatenate the DataFrames into a single DataFrame + concatenated_df = pd.concat(ALL.values(), keys=ALL.keys()) + concatenated_df.index = concatenated_df.index.set_names(['date', 'cause']) + HSI_ran_by_year = concatenated_df + + del ALL + + ALL = {} + # Plot time trend show year prior transition as well to emphasise that until that point DALYs incurred + # are consistent across different policies + for year in range(min_year, max_year+1): + year_target = year + + hsi_not_delivered_by_year = extract_results( + results_folder, + module='tlo.methods.healthsystem.summary', + key='Never_ran_HSI_Event', + custom_generate_series=get_counts_of_hsi_by_short_treatment_id_by_year, + do_scaling=True + ).pipe(set_param_names_as_column_index_level_0) + ALL[year_target] = hsi_not_delivered_by_year + + # Concatenate the DataFrames into a single DataFrame + concatenated_df = pd.concat(ALL.values(), keys=ALL.keys()) + concatenated_df.index = concatenated_df.index.set_names(['date', 'cause']) + HSI_never_ran_by_year = concatenated_df + + HSI_never_ran_by_year = HSI_never_ran_by_year.fillna(0) #clean_df( + HSI_ran_by_year = HSI_ran_by_year.fillna(0) + HSI_total_by_year = HSI_ran_by_year.add(HSI_never_ran_by_year, fill_value=0) + HSI_ran_by_year.to_csv('ConvertedOutputs/HSIs_ran_by_area_with_time.csv', index=True) + HSI_never_ran_by_year.to_csv('ConvertedOutputs/HSIs_never_ran_by_area_with_time.csv', index=True) + print(HSI_ran_by_year) + print(HSI_never_ran_by_year) + print(HSI_total_by_year) + +if __name__ == "__main__": + rfp = Path('resources') + + parser = argparse.ArgumentParser( + description="Produce plots to show the impact each set of treatments", + formatter_class=argparse.ArgumentDefaultsHelpFormatter, + ) + parser.add_argument( + "--output-path", + help=( + "Directory to write outputs to. If not specified (set to None) outputs " + "will be written to value of --results-path argument." + ), + type=Path, + default=None, + required=False, + ) + parser.add_argument( + "--resources-path", + help="Directory containing resource files", + type=Path, + default=Path('resources'), + required=False, + ) + parser.add_argument( + "--results-path", + type=Path, + help=( + "Directory containing results from running " + "src/scripts/healthsystem/impact_of_actual_vs_funded/scenario_impact_of_actual_vs_funded.py " + ), + default=None, + required=False + ) + args = parser.parse_args() + assert args.results_path is not None + results_path = args.results_path + + output_path = results_path if args.output_path is None else args.output_path + + apply( + results_folder=results_path, + output_folder=output_path, + resourcefilepath=args.resources_path + ) diff --git a/src/scripts/healthsystem/impact_of_actual_vs_funded/scenario_impact_of_actual_vs_funded.py b/src/scripts/healthsystem/impact_of_actual_vs_funded/scenario_impact_of_actual_vs_funded.py new file mode 100644 index 0000000000..8c5cb7d932 --- /dev/null +++ b/src/scripts/healthsystem/impact_of_actual_vs_funded/scenario_impact_of_actual_vs_funded.py @@ -0,0 +1,117 @@ +"""This Scenario file run the model under different assumptions for HR capabilities expansion in order to estimate the +impact that is achieved under each. + +Run on the batch system using: +``` +tlo batch-submit src/scripts/healthsystem/impact_of_policy/scenario_impact_actual_vs_funded.py +``` + +or locally using: +``` +tlo scenario-run src/scripts/healthsystem/impact_of_policy/scenario_impact_actual_vs_funded.py +``` + +""" +from pathlib import Path +from typing import Dict + +import pandas as pd + +from tlo import Date, logging +from tlo.analysis.utils import get_parameters_for_status_quo, mix_scenarios +from tlo.methods.fullmodel import fullmodel +from tlo.methods.scenario_switcher import ImprovedHealthSystemAndCareSeekingScenarioSwitcher +from tlo.scenario import BaseScenario + + +class ImpactOfHealthSystemMode(BaseScenario): + def __init__(self): + super().__init__() + self.seed = 0 + self.start_date = Date(2010, 1, 1) + self.end_date = self.start_date + pd.DateOffset(years=31) + self.pop_size = 100_000 + self._scenarios = self._get_scenarios() + self.number_of_draws = len(self._scenarios) + self.runs_per_draw = 10 + + def log_configuration(self): + return { + 'filename': 'effect_of_actual_vs_funded', + 'directory': Path('./outputs'), # <- (specified only for local running) + 'custom_levels': { + '*': logging.WARNING, + 'tlo.methods.demography': logging.INFO, + 'tlo.methods.demography.detail': logging.WARNING, + 'tlo.methods.healthburden': logging.INFO, + 'tlo.methods.healthsystem.summary': logging.INFO, + } + } + + def modules(self): + return ( + fullmodel(resourcefilepath=self.resources) + + [ImprovedHealthSystemAndCareSeekingScenarioSwitcher(resourcefilepath=self.resources)] + ) + + def draw_parameters(self, draw_number, rng): + if draw_number < self.number_of_draws: + return list(self._scenarios.values())[draw_number] + else: + return + + def _get_scenarios(self) -> Dict[str, Dict]: + """Return the Dict with values for the parameters that are changed, keyed by a name for the scenario. + """ + + self.YEAR_OF_CHANGE = 2024 + + return { + + "No growth actual": + mix_scenarios( + self._baseline(), + { + "HealthSystem": { + "use_funded_or_actual_staffing_postSwitch": "actual", + }, + } + ), + + "No growth funded": + mix_scenarios( + self._baseline(), + { + "HealthSystem": { + "use_funded_or_actual_staffing_postSwitch": "funded", + }, + } + ), + + } + + def _baseline(self) -> Dict: + """Return the Dict with values for the parameter changes that define the baseline scenario. """ + return mix_scenarios( + get_parameters_for_status_quo(), + { + "HealthSystem": { + "mode_appt_constraints": 1, # <-- Mode 1 prior to change to preserve calibration + "mode_appt_constraints_postSwitch": 2, # <-- Mode 2 post-change to show effects of HRH + "year_mode_switch": self.YEAR_OF_CHANGE, + "use_funded_or_actual_staffing": 'actual', + "year_use_funded_or_actual_staffing_switch": self.YEAR_OF_CHANGE, + "scale_to_effective_capabilities": False, + "policy_name": "Naive", + "tclose_overwrite": 1, + "tclose_days_offset_overwrite": 7, + "cons_availability": "default", + } + }, + ) + + +if __name__ == '__main__': + from tlo.cli import scenario_run + + scenario_run([__file__]) diff --git a/src/scripts/rti_emulator/analysis_extract_data.py b/src/scripts/rti_emulator/analysis_extract_data.py new file mode 100644 index 0000000000..16ba64c492 --- /dev/null +++ b/src/scripts/rti_emulator/analysis_extract_data.py @@ -0,0 +1,569 @@ +"""Produce plots to show the health impact (deaths, dalys) each the healthcare system (overall health impact) when +running under different MODES and POLICIES (scenario_test_rti_emulator)""" + +# short tclose -> ideal case +# long tclose -> status quo +import argparse +from pathlib import Path +from typing import Tuple + +import numpy as np +import pandas as pd +from matplotlib import pyplot as plt +from matplotlib.colors import Normalize +from scipy.optimize import curve_fit + +from tlo import Date +from tlo.analysis.utils import extract_results, summarize +from tlo.analysis.life_expectancy import get_life_expectancy_estimates +from tlo.analysis.utils import ( + CAUSE_OF_DEATH_OR_DALY_LABEL_TO_COLOR_MAP, + extract_results, + format_gbd, + get_color_cause_of_death_or_daly_label, + load_pickled_dataframes, + make_age_grp_lookup, + make_age_grp_types, + make_calendar_period_lookup, + make_calendar_period_type, + order_of_cause_of_death_or_daly_label, + plot_clustered_stacked, + summarize, +) +tag = 'new_normal_All' + +#tag = 'new_emulated_with_conditionality_All' +#outputs/test_rti_emulator-2025-02-20T092114Z + +#tag = 'new_normal_All' +#outputs/test_rti_emulator-2025-02-10T095938Z + +#tag = 'new_emulated_with_conditionality_All_x250_other' +#outputs/test_rti_emulator-2025-02-19T153503Z + +#tag = 'new_normal_All_x250_other' +#outputs/test_rti_emulator-2025-02-10T171625Z +""" +""" +#tag = 'normal_Nothing' +#tag = 'normal' + +# Range of years considered +min_year = 2010 +max_year = 2019 + +appt_dict = {'Under5OPD': 'OPD', + 'Over5OPD': 'OPD', + 'AntenatalFirst': 'AntenatalTotal', + 'ANCSubsequent': 'AntenatalTotal', + 'NormalDelivery': 'Delivery', + 'CompDelivery': 'Delivery', + 'EstMedCom': 'EstAdult', + 'EstNonCom': 'EstAdult', + 'VCTPositive': 'VCTTests', + 'VCTNegative': 'VCTTests', + 'DentAccidEmerg': 'DentalAll', + 'DentSurg': 'DentalAll', + 'DentU5': 'DentalAll', + 'DentO5': 'DentalAll', + 'MentOPD': 'MentalAll', + 'MentClinic': 'MentalAll' + } + +def apply(results_folder: Path, output_folder: Path, resourcefilepath: Path = None, ): + """Produce standard set of plots describing the effect of each TREATMENT_ID. + - We estimate the epidemiological impact as the EXTRA deaths that would occur if that treatment did not occur. + - We estimate the draw on healthcare system resources as the FEWER appointments when that treatment does not occur. + """ + + TARGET_PERIOD = (Date(min_year, 1, 1), Date(max_year, 1, 1)) + + # Definitions of general helper functions + make_graph_file_name = lambda stub: output_folder / f"{stub.replace('*', '_star_')}.png" # noqa: E731 + + def target_period() -> str: + """Returns the target period as a string of the form YYYY-YYYY""" + return "-".join(str(t.year) for t in TARGET_PERIOD) + + def get_parameter_names_from_scenario_file() -> Tuple[str]: + """Get the tuple of names of the scenarios from `Scenario` class used to create the results.""" + from scripts.rti_emulator.scenario_test_rti_emulator import ( + GenerateDataChains, + ) + e = GenerateDataChains() + return tuple(e._scenarios.keys()) + + def get_num_deaths(_df): + """Return total number of Deaths (total within the TARGET_PERIOD) + """ + return pd.Series(data=len(_df.loc[pd.to_datetime(_df.date).between(*TARGET_PERIOD)])) + + def get_num_dalys(_df): + """Return total number of DALYs (Stacked) by label (total within the TARGET_PERIOD)""" + return pd.Series( + data=_df + .loc[_df.year.between(*[i.year for i in TARGET_PERIOD])] + .drop(columns=['date', 'sex', 'age_range', 'year']) + .sum().sum() + ) + + def get_num_dalys_by_cause(_df): + """Return number of DALYs by cause by label (total within the TARGET_PERIOD)""" + return pd.Series( + data=_df + .loc[_df.year.between(*[i.year for i in TARGET_PERIOD])] + .drop(columns=['date', 'sex', 'age_range', 'year']) + .sum() + ) + + def set_param_names_as_column_index_level_0(_df): + """Set the columns index (level 0) as the param_names.""" + ordered_param_names_no_prefix = {i: x for i, x in enumerate(param_names)} + names_of_cols_level0 = [ordered_param_names_no_prefix.get(col) for col in _df.columns.levels[0]] + assert len(names_of_cols_level0) == len(_df.columns.levels[0]) + _df.columns = _df.columns.set_levels(names_of_cols_level0, level=0) + return _df + + def find_difference_relative_to_comparison(_ser: pd.Series, + comparison: str, + scaled: bool = False, + drop_comparison: bool = True, + ): + """Find the difference in the values in a pd.Series with a multi-index, between the draws (level 0) + within the runs (level 1), relative to where draw = `comparison`. + The comparison is `X - COMPARISON`.""" + return _ser \ + .unstack(level=0) \ + .apply(lambda x: (x - x[comparison]) / (x[comparison] if scaled else 1.0), axis=1) \ + .drop(columns=([comparison] if drop_comparison else [])) \ + .stack() + + + def get_counts_of_hsi_by_treatment_id(_df): + """Get the counts of the short TREATMENT_IDs occurring""" + _counts_by_treatment_id = _df \ + .loc[pd.to_datetime(_df['date']).between(*TARGET_PERIOD), 'TREATMENT_ID'] \ + .apply(pd.Series) \ + .sum() \ + .astype(int) + return _counts_by_treatment_id.groupby(level=0).sum() + + year_target = 2023 + def get_counts_of_hsi_by_treatment_id_by_year(_df): + """Get the counts of the short TREATMENT_IDs occurring""" + _counts_by_treatment_id = _df \ + .loc[pd.to_datetime(_df['date']).dt.year ==year_target, 'TREATMENT_ID'] \ + .apply(pd.Series) \ + .sum() \ + .astype(int) + return _counts_by_treatment_id.groupby(level=0).sum() + + def get_counts_of_hsi_by_short_treatment_id(_df): + """Get the counts of the short TREATMENT_IDs occurring (shortened, up to first underscore)""" + _counts_by_treatment_id = get_counts_of_hsi_by_treatment_id(_df) + _short_treatment_id = _counts_by_treatment_id.index.map(lambda x: x.split('_')[0] + "*") + return _counts_by_treatment_id.groupby(by=_short_treatment_id).sum() + + def get_counts_of_hsi_by_short_treatment_id_by_year(_df): + """Get the counts of the short TREATMENT_IDs occurring (shortened, up to first underscore)""" + _counts_by_treatment_id = get_counts_of_hsi_by_treatment_id_by_year(_df) + _short_treatment_id = _counts_by_treatment_id.index.map(lambda x: x.split('_')[0] + "*") + return _counts_by_treatment_id.groupby(by=_short_treatment_id).sum() + + def get_annual_num_appts_by_level(results_folder: Path) -> pd.DataFrame: + """Return pd.DataFrame gives the (mean) simulated annual number of appointments of each type at each level.""" + + def get_counts_of_appts(_df): + """Get the mean number of appointments of each type being used each year at each level. + Need to rename appts to match standardized categories from the DHIS2 data.""" + + def unpack_nested_dict_in_series(_raw: pd.Series): + return pd.concat( + { + idx: pd.DataFrame.from_dict(mydict) for idx, mydict in _raw.items() + } + ).unstack().fillna(0.0).astype(int) + + return _df \ + .loc[pd.to_datetime(_df['date']).between(*TARGET_PERIOD), 'Number_By_Appt_Type_Code_And_Level'] \ + .pipe(unpack_nested_dict_in_series) \ + .rename(columns=appt_dict, level=1) \ + .rename(columns={'1b': '2'}, level=0) \ + .groupby(level=[0, 1], axis=1).sum() \ + .mean(axis=0) # mean over each year (row) + + return extract_results( + results_folder, + module='tlo.methods.healthsystem.summary', + key='HSI_Event', + custom_generate_series=get_counts_of_appts, + do_scaling=True + ) + + + def get_annual_num_appts_by_level_with_confidence_interval(results_folder: Path) -> pd.DataFrame: + """Return pd.DataFrame gives the (mean) simulated annual number of appointments of each type at each level, + with 95% confidence interval.""" + #param_names = get_parameter_names_from_scenario_file() + def get_counts_of_appts(_df): + """Get the mean number of appointments of each type being used each year at each level. + Need to rename appts to match standardized categories from the DHIS2 data.""" + + def unpack_nested_dict_in_series(_raw: pd.Series): + return pd.concat( + { + idx: pd.DataFrame.from_dict(mydict) for idx, mydict in _raw.iteritems() + } + ).unstack().fillna(0.0).astype(int) + + return _df \ + .loc[pd.to_datetime(_df['date']).between(*TARGET_PERIOD), 'Number_By_Appt_Type_Code_And_Level'] \ + .pipe(unpack_nested_dict_in_series) \ + .rename(columns=appt_dict, level=1) \ + .rename(columns={'1b': '2'}, level=0) \ + .groupby(level=[0, 1], axis=1).sum() \ + .mean(axis=0) # mean over each year (row) + + return summarize( + extract_results( + results_folder, + module='tlo.methods.healthsystem.summary', + key='HSI_Event', + custom_generate_series=get_counts_of_appts, + do_scaling=True + ), + only_mean=False, + collapse_columns=True, + ).unstack().astype(int) + + + model = get_annual_num_appts_by_level(results_folder=results_folder) + model_summarised = summarize(model,only_mean=False,collapse_columns=True).unstack().astype(int) + model.to_csv('ConvertedOutputs/Emulator_Files/Total_Appt_Footprint_' + tag + '.csv', index=True) + model_summarised.to_csv('ConvertedOutputs/Emulator_Files/Total_Appt_Footprint_summarised_' + tag + '.csv', index=True) + #exit(-1) + # Obtain parameter names for this scenario file + param_names = get_parameter_names_from_scenario_file() + print(param_names) + + # ================================================================================================ + # TIME EVOLUTION OF TOTAL DALYs + # Plot DALYs averted compared to the ``No Policy'' policy + + year_target = 2023 # This global variable will be passed to custom function + def get_num_dalys_by_year(_df): + """Return total number of DALYs (Stacked) by label (total within the TARGET_PERIOD)""" + return pd.Series( + data=_df + .loc[_df.year == year_target] + .drop(columns=['date', 'sex', 'age_range', 'year']) + .sum().sum() + ) + + ALL = {} + # Plot time trend show year prior transition as well to emphasise that until that point DALYs incurred + # are consistent across different policies + this_min_year = 2010 + for year in range(this_min_year, max_year+1): + year_target = year + num_dalys_by_year = extract_results( + results_folder, + module='tlo.methods.healthburden', + key='dalys_stacked', + custom_generate_series=get_num_dalys_by_year, + do_scaling=True + ).pipe(set_param_names_as_column_index_level_0) + ALL[year_target] = num_dalys_by_year + # Concatenate the DataFrames into a single DataFrame + concatenated_df = pd.concat(ALL.values(), keys=ALL.keys()) + concatenated_df.index = concatenated_df.index.set_names(['date', 'index_original']) + concatenated_df = concatenated_df.reset_index(level='index_original',drop=True) + dalys_by_year = concatenated_df + print(dalys_by_year) + dalys_by_year_summarise = summarize(dalys_by_year).unstack().astype(int) + + dalys_by_year.to_csv('ConvertedOutputs/Emulator_Files/Total_DALYs_with_time_' + tag + '.csv', index=True) + dalys_by_year_summarise.to_csv('ConvertedOutputs/Emulator_Files/Total_DALYs_with_time_summarised_' + tag + '.csv', index=True) + + # ================================================================================================ + # Print population under each scenario + pop_model = extract_results(results_folder, + module="tlo.methods.demography", + key="population", + column="total", + index="date", + do_scaling=True + ).pipe(set_param_names_as_column_index_level_0) + + pop_model.index = pop_model.index.year + pop_model = pop_model[(pop_model.index >= this_min_year) & (pop_model.index <= max_year)] + print(pop_model) + assert dalys_by_year.index.equals(pop_model.index) + assert all(dalys_by_year.columns == pop_model.columns) + pop_model.to_csv('ConvertedOutputs/Emulator_Files/Population_with_time_' + tag + '.csv', index=True) + + # ================================================================================================ + # DEATHSs BROKEN DOWN BY CAUSES + # %% Quantify the health losses associated with all interventions combined. + + def get_num_deaths_by_cause_label(_df): + """Return total number of Deaths by label (total by age-group within the TARGET_PERIOD) + """ + return _df \ + .loc[pd.to_datetime(_df.date).between(*TARGET_PERIOD)] \ + .groupby(_df['label']) \ + .size() + + num_deaths_by_cause_label = summarize( + extract_results( + results_folder, + module='tlo.methods.demography', + key='death', + custom_generate_series=get_num_deaths_by_cause_label, + do_scaling=True + ).pipe(set_param_names_as_column_index_level_0) + ) + print("Total number of deaths") + print(num_deaths_by_cause_label) + + year_target = 2023 # This global variable will be passed to custom function + def get_num_deaths_by_year_and_cause(_df): + """Return total number of DALYs (Stacked) by label (total within the TARGET_PERIOD)""" + newTARGET_PERIOD = (Date(year_target, 1, 1), Date(year_target, 12, 31)) + return _df \ + .loc[pd.to_datetime(_df.date).between(*newTARGET_PERIOD)] \ + .groupby(_df['label']) \ + .size() + #.loc[pd.to_datetime(_df.date).dt.year == year_target] \ + + ALL = {} + # Plot time trend show year prior transition as well to emphasise that until that point DALYs incurred + # are consistent across different policies + this_min_year = 2010 + for year in range(this_min_year, max_year+1): + year_target = year + num_deaths_by_year_and_cause = extract_results( + results_folder, + module='tlo.methods.demography', + key='death', + custom_generate_series=get_num_deaths_by_year_and_cause, + do_scaling=True + ).pipe(set_param_names_as_column_index_level_0) + ALL[year_target] = num_deaths_by_year_and_cause #summarize(num_dalys_by_year) + + # Concatenate the DataFrames into a single DataFrame + concatenated_df = pd.concat(ALL.values(), keys=ALL.keys()) + + concatenated_df.index = concatenated_df.index.set_names(['date', 'cause']) + + df_total = concatenated_df + print(df_total) + print(df_total.groupby('cause').cumsum()) + print(summarize(df_total.groupby('cause').sum())) + df_total.to_csv('ConvertedOutputs/Emulator_Files/Deaths_by_cause_with_time_' + tag + '.csv', index=True) + df_total_summarise = summarize(df_total).unstack().astype(int) + df_total_summarise.to_csv('ConvertedOutputs/Emulator_Files/Deaths_by_cause_with_time_summarised_' + tag + '.csv', index=True) + + + # ================================================================================================ + # DALYs BROKEN DOWN BY CAUSES AND YEAR + # DALYs by cause per year + # %% Quantify the health losses associated with all interventions combined. + + year_target = 2023 # This global variable will be passed to custom function + def get_num_dalys_by_year_and_cause(_df): + """Return total number of DALYs (Stacked) by label (total within the TARGET_PERIOD)""" + return pd.Series( + data=_df + .loc[_df.year == year_target] + .drop(columns=['date', 'sex', 'age_range', 'year']) + .sum() + ) + + ALL = {} + # Plot time trend show year prior transition as well to emphasise that until that point DALYs incurred + # are consistent across different policies + this_min_year = 2010 + for year in range(this_min_year, max_year+1): + year_target = year + num_dalys_by_year = extract_results( + results_folder, + module='tlo.methods.healthburden', + key='dalys_stacked', + custom_generate_series=get_num_dalys_by_year_and_cause, + do_scaling=True + ).pipe(set_param_names_as_column_index_level_0) + ALL[year_target] = num_dalys_by_year #summarize(num_dalys_by_year) + + # Concatenate the DataFrames into a single DataFrame + concatenated_df = pd.concat(ALL.values(), keys=ALL.keys()) + + concatenated_df.index = concatenated_df.index.set_names(['date', 'cause']) + + df_total = concatenated_df + print(df_total) + df_total.to_csv('ConvertedOutputs/Emulator_Files/DALYS_by_cause_with_time_' + tag + '.csv', index=True) + df_total_summarise = summarize(df_total).unstack().astype(int) + print(df_total_summarise) + df_total_summarise.to_csv('ConvertedOutputs/Emulator_Files/DALYS_by_cause_with_time_summarised_' + tag + '.csv', index=True) + + + + ALL = {} + # Plot time trend show year prior transition as well to emphasise that until that point DALYs incurred + # are consistent across different policies + for year in range(min_year, max_year+1): + year_target = year + + hsi_delivered_by_year = extract_results( + results_folder, + module='tlo.methods.healthsystem.summary', + key='HSI_Event', + custom_generate_series=get_counts_of_hsi_by_short_treatment_id_by_year, + do_scaling=True + ).pipe(set_param_names_as_column_index_level_0) + ALL[year_target] = hsi_delivered_by_year + + # Concatenate the DataFrames into a single DataFrame + concatenated_df = pd.concat(ALL.values(), keys=ALL.keys()) + concatenated_df.index = concatenated_df.index.set_names(['date', 'cause']) + HSI_ran_by_year = concatenated_df + + del ALL + + ALL = {} + # Plot time trend show year prior transition as well to emphasise that until that point DALYs incurred + # are consistent across different policies + for year in range(min_year, max_year+1): + year_target = year + + hsi_not_delivered_by_year = extract_results( + results_folder, + module='tlo.methods.healthsystem.summary', + key='Never_ran_HSI_Event', + custom_generate_series=get_counts_of_hsi_by_short_treatment_id_by_year, + do_scaling=True + ).pipe(set_param_names_as_column_index_level_0) + ALL[year_target] = hsi_not_delivered_by_year + + # Concatenate the DataFrames into a single DataFrame + concatenated_df = pd.concat(ALL.values(), keys=ALL.keys()) + concatenated_df.index = concatenated_df.index.set_names(['date', 'cause']) + HSI_never_ran_by_year = concatenated_df + + HSI_never_ran_by_year = HSI_never_ran_by_year.fillna(0) #clean_df( + HSI_ran_by_year = HSI_ran_by_year.fillna(0) + HSI_total_by_year = HSI_ran_by_year.add(HSI_never_ran_by_year, fill_value=0) + HSI_ran_by_year.to_csv('ConvertedOutputs/Emulator_Files/HSIs_ran_by_area_with_time_' + tag + '.csv', index=True) + HSI_never_ran_by_year.to_csv('ConvertedOutputs/Emulator_Files/HSIs_never_ran_by_area_with_time_' + tag + '.csv', index=True) + print(HSI_ran_by_year) + print(HSI_never_ran_by_year) + print(HSI_total_by_year) + + def get_dalys_by_period_sex_agegrp_label(df): + """Sum the dalys by period, sex, age-group and label""" + _, calperiodlookup = make_calendar_period_lookup() + + df['age_grp'] = df['age_range'].astype(make_age_grp_types()) + df["period"] = df["year"].map(calperiodlookup).astype(make_calendar_period_type()) + df = df.drop(columns=['date', 'age_range', 'year']) + df = df.groupby(by=["period", "sex", "age_grp"]).sum().stack() + df.index = df.index.set_names('label', level=3) + return df + + results = extract_results( + results_folder, + module="tlo.methods.healthburden", + key="dalys_stacked_by_age_and_time", # <-- for DALYS stacked by age and time + custom_generate_series=get_dalys_by_period_sex_agegrp_label, + do_scaling=True + ) + + # divide by five to give the average number of deaths per year within the five year period: + results = results.div(5.0) + results_to_store = (results.loc['2010-2014'] + results.loc['2015-2019'])/2 + results_to_store.to_csv('ConvertedOutputs/Emulator_Files/DALYs_by_sex_age_' + tag + '.csv', index=True) + results_to_store = summarize(results_to_store) + results_to_store.to_csv('ConvertedOutputs/Emulator_Files/DALYs_by_sex_age_summarised_' + tag + '.csv', index=True) + + + def get_total_num_dalys_by_wealth_and_label(_df): + """Return the total number of DALYS in the TARGET_PERIOD by wealth and cause label.""" + wealth_cats = {5: '0-19%', 4: '20-39%', 3: '40-59%', 2: '60-79%', 1: '80-100%'} + + return _df \ + .loc[_df['year'].between(*[d.year for d in TARGET_PERIOD])] \ + .drop(columns=['date', 'year']) \ + .assign( + li_wealth=lambda x: x['li_wealth'].map(wealth_cats).astype( + pd.CategoricalDtype(wealth_cats.values(), ordered=True) + ) + ).melt(id_vars=['li_wealth'], var_name='label') \ + .groupby(by=['li_wealth', 'label'])['value'] \ + .sum() + + total_num_dalys_by_wealth_and_label = extract_results( + results_folder, + module="tlo.methods.healthburden", + key="dalys_by_wealth_stacked_by_age_and_time", + custom_generate_series=get_total_num_dalys_by_wealth_and_label, + do_scaling=True, + ) + + total_num_dalys_by_wealth_and_label.to_csv('ConvertedOutputs/Emulator_Files/DALYs_by_wealth_' + tag + '.csv', index=True) + + total_num_dalys_by_wealth_and_label = summarize(total_num_dalys_by_wealth_and_label, + collapse_columns=True, + only_mean=False, + ).unstack() + + total_num_dalys_by_wealth_and_label.to_csv('ConvertedOutputs/Emulator_Files/DALYs_by_wealth_summarised_' + tag + '.csv', index=True) + + + + +if __name__ == "__main__": + rfp = Path('resources') + + parser = argparse.ArgumentParser( + description="Produce plots to show the impact each set of treatments", + formatter_class=argparse.ArgumentDefaultsHelpFormatter, + ) + parser.add_argument( + "--output-path", + help=( + "Directory to write outputs to. If not specified (set to None) outputs " + "will be written to value of --results-path argument." + ), + type=Path, + default=None, + required=False, + ) + parser.add_argument( + "--resources-path", + help="Directory containing resource files", + type=Path, + default=Path('resources'), + required=False, + ) + parser.add_argument( + "--results-path", + type=Path, + help=( + "Directory containing results from running " + "src/scripts/rti_emulator/scenario_test_rti_emulator " + ), + default=None, + required=False + ) + args = parser.parse_args() + assert args.results_path is not None + results_path = args.results_path + + output_path = results_path if args.output_path is None else args.output_path + + apply( + results_folder=results_path, + output_folder=output_path, + resourcefilepath=args.resources_path + ) diff --git a/src/scripts/rti_emulator/final_analysis_extract_data.py b/src/scripts/rti_emulator/final_analysis_extract_data.py new file mode 100644 index 0000000000..ca702699a4 --- /dev/null +++ b/src/scripts/rti_emulator/final_analysis_extract_data.py @@ -0,0 +1,826 @@ +"""Produce plots to show the health impact (deaths, dalys) each the healthcare system (overall health impact) when +running under different MODES and POLICIES (scenario_impact_of_capabilities_expansion_scaling.py)""" + +# short tclose -> ideal case +# long tclose -> status quo +import argparse +from pathlib import Path +from typing import Tuple + +import pandas as pd +from tlo import Date +from tlo.analysis.utils import extract_results, make_calendar_period_type, make_calendar_period_lookup, make_age_grp_lookup, make_age_grp_types +import matplotlib.pyplot as plt + +# Archive of outputs +# Batch based on /Users/mm2908/Desktop/EmuIBM/Save_With_WellPerforming/emulators/latest_CTGANSynthesizer_epochs500_dsF_batch_size500_num_k_folds10_Nsubsample10000_InAndOutC_test_k_folding_UniformEncoder_CTGANtest3_repeat_seed42_k_fold0.pkl +# 'standard_RTI': 'outputs/test_rti_emulator-2025-08-12T205454Z' +# 'emulated_RTI': 'outputs/test_rti_emulator-2025-08-13T080302Z' +# 'standard_RTI_x250': 'outputs/test_rti_emulator-2025-09-06T055808Z' +# 'emulated_RTI_x250': 'outputs/test_rti_emulator-2025-09-05T085159Z' +# 'no_RTI': 'outputs/test_rti_emulator-2025-08-13T143342Z' + + +# Latest CTGAN runs + number of runs per scenario increased from 10 -> 50 +outputs = { + 'standard_RTI': {'results_folder' : Path('outputs/test_rti_emulator-2025-08-12T205454Z'), 'data': {}}, + 'emulated_RTI': {'results_folder' : Path('outputs/test_rti_emulator-2025-08-13T080302Z'), 'data' : {}}, + 'standard_RTI_x250': {'results_folder' : Path('outputs/test_rti_emulator-2025-09-06T055808Z'), 'data': {}}, + 'emulated_RTI_x250': {'results_folder' : Path('outputs/test_rti_emulator-2025-09-05T085159Z'), 'data' : {}}, + 'no_RTI': {'results_folder' : Path('outputs/test_rti_emulator-2025-08-13T143342Z'), 'data' : {}}, + } + + +def apply(results_folder: Path) -> Tuple: + + tag = results_folder.name + + def get_parameter_names_from_scenario_file() -> Tuple[str]: + """Get the tuple of names of the scenarios from `Scenario` class used to create the results.""" + from scripts.healthsystem.impact_of_const_capabilities_expansion.scenario_impact_of_capabilities_expansion_scaling import ( + ImpactOfHealthSystemMode, + ) + e = ImpactOfHealthSystemMode() + return tuple(e._scenarios.keys()) + + def set_param_names_as_column_index_level_0(_df): + """Set the columns index (level 0) as the param_names.""" + ordered_param_names_no_prefix = {i: x for i, x in enumerate(param_names)} + names_of_cols_level0 = [ordered_param_names_no_prefix.get(col) for col in _df.columns.levels[0]] + assert len(names_of_cols_level0) == len(_df.columns.levels[0]) + _df.columns = _df.columns.set_levels(names_of_cols_level0, level=0) + return _df + + def get_counts_of_death_by_year(df): + df["year"] = df["date"].dt.year + result = df.groupby(by=["year", "label"])["person_id"].count() + return result + + def get_num_dalys_by_year(_df): + drop_cols = ["date", "sex", "age_range"] + result = _df.drop(columns=drop_cols).groupby("year", as_index=True).sum().stack() + result.index.set_names(["year", "label"], inplace=True) + return result + + def get_dalys_by_period_sex_agegrp_label(df): + """Sum the dalys by period, sex, age-group and label""" + _, calperiodlookup = make_calendar_period_lookup() + + df['age_grp'] = df['age_range'].astype(make_age_grp_types()) + df["period"] = df["year"].map(calperiodlookup).astype(make_calendar_period_type()) + df = df.drop(columns=['date', 'age_range', 'year']) + df = df.groupby(by=["period", "sex", "age_grp"]).sum().stack() + df.index = df.index.set_names('label', level=3) + return df + + def rename_index_levels(df): + rename_dict = {} + for level in df.index.names: + if level == 'date': + rename_dict['date'] = 'year' + elif level == 'label': + rename_dict['label'] = 'cause' + + if rename_dict: + df = df.rename_axis(index=rename_dict) + + if "year" in df.index.names: + years = df.index.get_level_values("year") + mask = (years >= 2010) & (years < 2020) + df = df[mask] + return df + + param_names = get_parameter_names_from_scenario_file() + + num_deaths_by_year_and_cause = rename_index_levels(extract_results( + results_folder, + module="tlo.methods.demography", + key="death", + custom_generate_series=get_counts_of_death_by_year, + do_scaling=True + )) + + num_dalys_by_year_and_cause = rename_index_levels(extract_results( + results_folder, + module="tlo.methods.healthburden", + key="dalys_stacked", + custom_generate_series=get_num_dalys_by_year, + do_scaling=True + )) + + num_individuals = extract_results(results_folder, + module="tlo.methods.demography", + key="population", + column="total", + index="date", + do_scaling=True + ) + num_individuals.index = num_individuals.index.year + num_individuals = rename_index_levels(num_individuals) + + cfr = num_deaths_by_year_and_cause.div(num_individuals, level="year") + + dalys_by_sex_and_age_time = extract_results( + results_folder, + module="tlo.methods.healthburden", + key="dalys_stacked_by_age_and_time", # <-- for DALYS stacked by age and time + custom_generate_series=get_dalys_by_period_sex_agegrp_label, + do_scaling=True + ) + # divide by five to give the average number of deaths per year within the five year period: + dalys_by_sex_and_age_time = dalys_by_sex_and_age_time.div(5.0) + dalys_by_sex_and_age = (dalys_by_sex_and_age_time.loc['2010-2014'] + dalys_by_sex_and_age_time.loc['2015-2019'])/2 + + # Collect all data for this output + data = {'deaths' : num_deaths_by_year_and_cause, 'dalys' : num_dalys_by_year_and_cause, 'pop' : num_individuals, 'cfr' : cfr, 'dalys_by_sex_and_age' : dalys_by_sex_and_age} + + return data + + +# Extract and calculate summary statistics +def compute_summary_stats(df): + df_mean = df.mean(axis=1) + df[('0','mean')] = df_mean + df_lower = df.quantile((1-0.682)/2.0,axis=1) + df_upper = df.quantile((1-0.682)/2.0+0.682,axis=1) + df[('0','mean')] = df_mean + df[('0','lower')] = df_lower + df[('0','upper')] = df_upper + return df + + +def compare_and_make_plots_age_sex_distr(outputs, first_scenario, second_scenario, third_scenario, fourth_scenario, target, compare_to_other_x250=True): + # Load from outputs + df_map = { + "normal": outputs[first_scenario]['data'][target], + "emulated": outputs[second_scenario]['data'][target], + "normal_x250": outputs[third_scenario]['data'][target], + "emulated_x250": outputs[fourth_scenario]['data'][target], + } + labels = [first_scenario, second_scenario, third_scenario, fourth_scenario] + + # Focus cause + cause = 'Transport Injuries' + for k, df in df_map.items(): + df_map[k] = compute_summary_stats(df.loc[(slice(None), slice(None), cause)]) + + # Age group mapping + age_grps = df_map["normal"].index.get_level_values('age_grp').unique() + age_grp_mapping = {age: i for i, age in enumerate(age_grps)} + + # Init p-values + pvalues = pd.DataFrame(columns=['p-value ks', 'p-value t', 'p-value w'], + index=pd.MultiIndex.from_tuples([], names=['case', 'sex', 'age_grp'])) + """ + # ---- Helper: compute p-values + def run_tests(case, df1, df2): + for sex in ['F', 'M']: + for age in age_grps: + data1, data2 = df1.loc[(sex, age)].values, df2.loc[(sex, age)].values + p_ks = ks_2samp(data1, data2).pvalue + p_t = stats.ttest_ind(data1, data2).pvalue + p_w = stats.wilcoxon(data1, data2).pvalue + pvalues.loc[(case, sex, age), :] = [p_ks, p_t, p_w] + + run_tests("other_x1", df_map["normal"], df_map["emulated"]) + if compare_to_other_x250: + run_tests("other_x250", df_map["normal_x250"], df_map["emulated_x250"]) + """ + + # ---- Plot + fig, axes = plt.subplots(1, 2, figsize=(14, 7), sharey=True) + for ax in axes: ax.set_ylim(-10, 120000) + + for i, sex in enumerate(['F', 'M']): + x_pos = [age_grp_mapping[a] for a in age_grps] + + def plot_band_and_mean(df, color, marker, label, alpha_fill, alpha_line, ls): + mean = df.loc[(sex, slice(None)), ('0', 'mean')].values + low = df.loc[(sex, slice(None)), ('0', 'lower')].values + up = df.loc[(sex, slice(None)), ('0', 'upper')].values + axes[i].fill_between(x_pos, low, up, color=color, alpha=alpha_fill) + axes[i].plot(x_pos, mean, marker=marker, color=color, alpha=alpha_line, linestyle=ls, label=label) + return mean + + # Plot main (normal vs emulated) + ls, fill, line = ('--', 0.1, 0.4) if compare_to_other_x250 else ('-', 0.3, 1.0) + m_norm = plot_band_and_mean(df_map["normal"], "blue", "o", labels[0] if i==0 else None, fill, line, ls) + m_emul = plot_band_and_mean(df_map["emulated"], "orange", "s", labels[1] if i==0 else None, fill, line, ls) + + # Optional comparison to x250 + if compare_to_other_x250: + m_norm_x250 = plot_band_and_mean(df_map["normal_x250"], "blue", "o", labels[2] if i==0 else None, 0.3, 1.0, '-') + m_emul_x250 = plot_band_and_mean(df_map["emulated_x250"], "orange", "s", labels[3] if i==0 else None, 0.3, 1.0, '-') + + """ + # Annotate KS values + y_base = 42000 if sex=="M" else 30000 + for case, offset in [("other_x1", 7000), ("other_x250", 0)] if compare_to_other_x250 else [("other_x1", 7000)]: + for age in age_grps: + axes[i].text(age_grp_mapping[age], y_base+offset, + f"{pvalues.loc[(case, sex, age), 'p-value ks']:.2f}", + fontsize=8, ha="center", rotation=90) + axes[i].text(age_grp_mapping['0-4'], y_base+offset+3000, + "KS test p-value" + (" (incr. other mortality)" if case=="other_x250" else ""), + fontsize=8, ha="left") + """ + # Formatting + axes[i].set_title(sex) + axes[i].set_xlabel("Age Group") + axes[i].set_ylabel("Averaged Yearly DALYs") + axes[i].set_xticks(x_pos) + axes[i].set_xticklabels(age_grps, rotation=45) + axes[i].legend() + + plt.tight_layout() + fname = "plots/final_DALYs_Breakdown_by_age_grp_DC.png" if compare_to_other_x250 else "plots/final_DALYs_Breakdown_by_age_grp.png" + plt.savefig(fname) + plt.close() + +def compare_old(outputs, first_scenario, second_scenario, third_scenario, fourth_scenario, target): + + # Load output files, which include standard other mortality and other mortality rates increased by x250 + df_af_normal = pd.read_csv('DALYs_by_sex_age_' + first_data_set + '.csv', header=[0, 1], index_col=[0,1,2]) + df_af_emulated = pd.read_csv('DALYs_by_sex_age_' + second_data_set + '.csv', header=[0, 1], index_col=[0,1,2]) + df_af_normal_other_x250 = pd.read_csv('DALYs_by_sex_age_' + first_data_set + '_x250_other.csv', header=[0, 1], index_col=[0,1,2]) + df_af_emulated_other_x250 = pd.read_csv('DALYs_by_sex_age_'+ second_data_set + '_x250_other.csv', header=[0, 1], index_col=[0,1,2]) + + # This is the cause that will be considered in the plots. Could also make loop over this + cause = 'Transport Injuries' + df_normal = df_af_normal.loc[(slice(None), slice(None), cause)] + df_normal_other_x250 = df_af_normal_other_x250.loc[(slice(None), slice(None), cause)] + df_emulated = df_af_emulated.loc[(slice(None), slice(None), cause)] + df_emulated_other_x250 = df_af_emulated_other_x250.loc[(slice(None), slice(None), cause)] + + # Map 'age_grp' to numeric values for easier plotting + age_grp_mapping = {age: i for i, age in enumerate(df_normal.index.get_level_values('age_grp').unique())} + + df_normal = compute_summary_stats(df_normal) + df_emulated = compute_summary_stats(df_emulated) + df_normal_other_x250 = compute_summary_stats(df_normal_other_x250) + df_emulated_other_x250 = compute_summary_stats(df_emulated_other_x250) + + # Store p-values for all x-values + index = pd.MultiIndex.from_tuples([], names=['case', 'sex', 'age_grp']) + columns = ['p-value ks', 'p-value t', 'p-value w'] + pvalues = pd.DataFrame(index=index, columns=columns) + + for sex in ['F','M']: + for age_group in df_normal_other_x250.index.get_level_values('age_grp').unique(): + + # Compute statistical tests for x1 other mortality + data_normal = df_normal_other_x250.loc[(sex,age_group)].values + data_emulat = df_emulated_other_x250.loc[(sex,age_group)].values + + statistic, p_value_ks = ks_2samp(data_normal, data_emulat) + t_stat, p_value_t = stats.ttest_ind(data_normal, data_emulat) + res_w = stats.wilcoxon(data_normal,data_emulat) + p_value_w = res_w.pvalue + pvalues.loc[('other_x250',sex,age_group),:] = [p_value_ks, p_value_t, p_value_w] + + # Compute statistical tests for x250 other mortality + data_normal = df_normal.loc[(sex,age_group)].values + data_emulat = df_emulated.loc[(sex,age_group)].values + + statistic, p_value_ks = ks_2samp(data_normal, data_emulat) + t_stat, p_value_t = stats.ttest_ind(data_normal, data_emulat) + res_w = stats.wilcoxon(data_normal,data_emulat) + p_value_w = res_w.pvalue + pvalues.loc[('other_x1',sex,age_group),:] = [p_value_ks, p_value_t, p_value_w] + + + # Create a figure for the subplots (one for each sex) + fig, axes = plt.subplots(1, 2, figsize=(14, 7), sharey=True) + for ax in axes: + ax.set_ylim(-1000, 55000) + + # Plot for each sex + for sex in ['F','M']: + if sex == 'F': + i = 0 + else: + i = 1 + + # Extract the age groups from the dataframe + age_groups = df_normal.index.get_level_values('age_grp').unique() + + # Create a numeric x_pos based on age groups for easier plotting + x_pos = [age_grp_mapping[age] for age in age_groups] + + # Prepare the values for plotting + normal_means = df_normal.loc[(sex, slice(None)),('0','mean')].values + emulated_means = df_emulated.loc[(sex, slice(None)),('0','mean')].values + normal_lower = df_normal.loc[(sex, slice(None)),('0','lower')].values + normal_upper = df_normal.loc[(sex, slice(None)),('0','upper')].values + emulated_lower = df_emulated.loc[(sex, slice(None)),('0','lower')].values + emulated_upper = df_emulated.loc[(sex, slice(None)),('0','upper')].values + + if compare_to_other_x250: + normal_means_other_x250 = df_normal_other_x250.loc[(sex, slice(None)),('0','mean')].values + emulated_means_other_x250 = df_emulated_other_x250.loc[(sex, slice(None)),('0','mean')].values + normal_lower_other_x250 = df_normal_other_x250.loc[(sex, slice(None)),('0','lower')].values + normal_upper_other_x250 = df_normal_other_x250.loc[(sex, slice(None)),('0','upper')].values + emulated_lower_other_x250 = df_emulated_other_x250.loc[(sex, slice(None)),('0','lower')].values + emulated_upper_other_x250 = df_emulated_other_x250.loc[(sex, slice(None)),('0','upper')].values + fill_main_comparison = 0.1 + line_main_compariosn = 0.4 + main_linestyle = '--' + print(main_linestyle) + else: + fill_main_comparison = 0.3 + line_main_compariosn = 1.0 + main_linestyle = '-' + + # Calculate % error: ((emulated - original) / original) * 100 + percent_error = 100 * (emulated_means - normal_means) / normal_means + if compare_to_other_x250: + percent_error_other_x250 = 100 * (emulated_means_other_x250 - normal_means_other_x250) / normal_means_other_x250 + + # Plot the shaded area for normal data (F or M) + axes[i].fill_between(x_pos, normal_lower, normal_upper, + color='blue', alpha=fill_main_comparison) + if compare_to_other_x250: + axes[i].fill_between(x_pos, normal_lower_other_x250, normal_upper_other_x250, + color='blue', alpha=0.3) + + # Plot the shaded area for emulated data (F or M) + axes[i].fill_between(x_pos, emulated_lower, emulated_upper, + color='orange', alpha=fill_main_comparison) + if compare_to_other_x250: + axes[i].fill_between(x_pos, emulated_lower_other_x250, emulated_upper_other_x250, + color='orange', alpha=0.3) + + # Plot the mean values for normal and emulated data + axes[i].plot(x_pos, normal_means, 'o-', color='blue', alpha=line_main_compariosn, linestyle=main_linestyle, label=labels[first_data_set] if i == 0 else "") # Normal mean + axes[i].plot(x_pos, emulated_means, 's-', color='orange', alpha=line_main_compariosn, linestyle=main_linestyle, label=labels[second_data_set] if i == 0 else "") # Emulated mean + if compare_to_other_x250: + axes[i].plot(x_pos, normal_means_other_x250, 'o-', color='blue', label=f'Original (incr. other mortality)' if i == 0 else "") # Normal mean + axes[i].plot(x_pos, emulated_means_other_x250, 's-', color='orange', label=f'Emulated (incr. other mortality)' if i == 0 else "") # Emulated mean + + # Include KS test p-values in the plot + if sex == 'M': + ks_yloc_other_x250 = 42000 + else: + ks_yloc_other_x250 = 30000 + ks_yloc_other_x1 = ks_yloc_other_x250 + 7000 + + for age_grp in df_normal.index.get_level_values('age_grp').unique(): + ks_value = pvalues.loc[('other_x1',sex, age_grp), 'p-value ks'] + axes[i].text( + x=age_grp_mapping[age_grp], + y=ks_yloc_other_x1, + s=f"{ks_value:.2f}", + fontsize=8, + ha='center', + rotation=90, + #bbox=dict(boxstyle="round,pad=0.1", facecolor="white", alpha=0.7) + ) + + #ks_value = pvalues.loc[(date, cause), 'p-value ks'] + leg_ypos = ks_yloc_other_x1+3000 + axes[i].text( + x=age_grp_mapping['0-4'], + y=leg_ypos, + s=f"KS test p-value", + fontsize=8, + ha='left', + rotation=0, + #bbox=dict(boxstyle="round,pad=0.2", facecolor="white", alpha=0.7) + ) + if compare_to_other_x250: + for age_grp in df_normal.index.get_level_values('age_grp').unique(): + ks_value = pvalues.loc[('other_x250',sex, age_grp), 'p-value ks'] + axes[i].text( + x=age_grp_mapping[age_grp], + y=ks_yloc_other_x250, + s=f"{ks_value:.2f}", + fontsize=8, + ha='center', + rotation=90, + #bbox=dict(boxstyle="round,pad=0.2", facecolor="white", alpha=0.7) + ) + + leg_ypos = ks_yloc_other_x250+3000 + axes[i].text( + x=age_grp_mapping['0-4'], + y=leg_ypos, + s=f"KS test p-value (incr. other mortality)", + fontsize=8, + ha='left', + rotation=0, + #bbox=dict(boxstyle="round,pad=0.2", facecolor="white", alpha=0.7) + ) + + # Set titles and labels + axes[i].set_title(f'{sex}') + axes[i].set_xlabel('Age Group') + axes[i].set_ylabel('Averaged Yearly DALYs') + + # Set x-ticks and labels + axes[i].set_xticks(x_pos) + axes[i].set_xticklabels(age_groups, rotation=45) + + # Add a legend + axes[i].legend() + + # Adjust layout for better spacing + plt.tight_layout() + + # Show the plot + if compare_to_other_x250: + plt.savefig('plots/final_DALYs_Breakdown_by_age_grp_DC.png') + else: + plt.savefig('plots/final_DALYs_Breakdown_by_age_grp.png') + + +def compare_and_plot(outputs, first_scenario, second_scenario, target, factor=None, ylabel=None): + + if factor is None: + df_first = outputs[first_scenario]['data'][target] # formally df + df_second = outputs[second_scenario]['data'][target] # formally df_emulated + else: + df_first = outputs[first_scenario]['data'][target]*factor # formally df + df_second = outputs[second_scenario]['data'][target]*factor # formally df_emulated + + # Define label names + labels = [first_scenario, second_scenario] + + # Iterate over causes + causes = df_first.index.get_level_values('cause').unique() + for cause in causes: + # Extract relevant data for both scenarios + data = {} + for label_name, dataset in zip(labels, [df_first, df_second]): + df_cause = dataset.loc[dataset.index.get_level_values('cause') == cause, ('0', ['lower', 'mean', 'upper'])] + data[label_name] = { + 'lower': df_cause[('0', 'lower')], + 'mean': df_cause[('0', 'mean')], + 'upper': df_cause[('0', 'upper')] + } + + years = data[labels[0]]['mean'].index.get_level_values('year') + + # % error + percent_error = 100 * (data[labels[1]]['mean'].values - data[labels[0]]['mean'].values) / data[labels[0]]['mean'].values + + # Create figure with two panels + fig, (ax_main, ax_error) = plt.subplots(2, 1, figsize=(10, 8), + gridspec_kw={'height_ratios': [3, 1]}, sharex=True) + + colors = ['blue', 'orange'] + markers = ['o', 'x'] + for i, label_name in enumerate(labels): + ax_main.plot(years, data[label_name]['mean'], label=label_name, color=colors[i], linestyle='-', marker=markers[i]) + ax_main.fill_between(years, data[label_name]['lower'], data[label_name]['upper'], color=colors[i], alpha=0.2) + + if ylabel is None: + ax_main.set_ylabel(f'{target}') + else: + ax_main.set_ylabel(f'{ylabel}') + ax_main.set_title(f'{target} due to {cause}') + ax_main.legend(loc='lower right') + ax_main.grid(True) + ax_main.set_ylim(0, 1.2e6) + + + # % error panel + print("For cause ", cause, " and target ", target, " MEAN PERC ERROR +++++++", percent_error.mean()) + ax_error.plot(years, percent_error, color='red', marker='o', linestyle='-', label='Mean') + ax_error.axhline(0, color='black', linestyle='--', linewidth=0.8) + ax_error.set_ylabel('% Error') + ax_error.set_xlabel('Year') + ax_error.legend(loc='upper right') + ax_error.grid(True) + + plt.tight_layout() + plt.savefig(f'plots/final_{target}_due_to_{cause}.png') + plt.close() + + +# First collect all data +for key in outputs.keys(): + outputs[key]['data'] = apply(outputs[key]['results_folder']) + for data_type in outputs[key]['data'].keys(): + outputs[key]['data'][data_type] = compute_summary_stats(outputs[key]['data'][data_type]) + + +compare_and_plot(outputs, 'standard_RTI', 'emulated_RTI', 'deaths', None, 'Deaths') +compare_and_plot(outputs, 'standard_RTI', 'emulated_RTI', 'dalys', None, 'DALYs') +compare_and_plot(outputs, 'standard_RTI', 'emulated_RTI', 'cfr', 1000, 'Crude mortality rate (/year/1000 individuals)') +compare_and_make_plots_age_sex_distr(outputs, 'standard_RTI', 'emulated_RTI', 'standard_RTI_x250', 'emulated_RTI_x250', 'dalys_by_sex_and_age', compare_to_other_x250=True) + + +exit(-1) + + + + +TARGET_PERIOD = (Date(min_year, 1, 1), Date(max_year, 1, 1)) + +# Definitions of general helper functions +lambda stub: output_folder / f"{stub.replace('*', '_star_')}.png" # noqa: E731 + +def target_period() -> str: + """Returns the target period as a string of the form YYYY-YYYY""" + return "-".join(str(t.year) for t in TARGET_PERIOD) + + + +def get_num_deaths(_df): + """Return total number of Deaths (total within the TARGET_PERIOD) + """ + return pd.Series(data=len(_df.loc[pd.to_datetime(_df.date).between(*TARGET_PERIOD)])) + +def get_num_dalys(_df): + """Return total number of DALYs (Stacked) by label (total within the TARGET_PERIOD)""" + return pd.Series( + data=_df + .loc[_df.year.between(*[i.year for i in TARGET_PERIOD])] + .drop(columns=['date', 'sex', 'age_range', 'year']) + .sum().sum() + ) + +def get_num_dalys_by_cause(_df): + """Return number of DALYs by cause by label (total within the TARGET_PERIOD)""" + return pd.Series( + data=_df + .loc[_df.year.between(*[i.year for i in TARGET_PERIOD])] + .drop(columns=['date', 'sex', 'age_range', 'year']) + .sum() + ) + +def set_param_names_as_column_index_level_0(_df): + """Set the columns index (level 0) as the param_names.""" + ordered_param_names_no_prefix = {i: x for i, x in enumerate(param_names)} + names_of_cols_level0 = [ordered_param_names_no_prefix.get(col) for col in _df.columns.levels[0]] + assert len(names_of_cols_level0) == len(_df.columns.levels[0]) + _df.columns = _df.columns.set_levels(names_of_cols_level0, level=0) + return _df + +def find_difference_relative_to_comparison(_ser: pd.Series, + comparison: str, + scaled: bool = False, + drop_comparison: bool = True, + ): + """Find the difference in the values in a pd.Series with a multi-index, between the draws (level 0) + within the runs (level 1), relative to where draw = `comparison`. + The comparison is `X - COMPARISON`.""" + return _ser \ + .unstack(level=0) \ + .apply(lambda x: (x - x[comparison]) / (x[comparison] if scaled else 1.0), axis=1) \ + .drop(columns=([comparison] if drop_comparison else [])) \ + .stack() + + +def get_counts_of_hsi_by_treatment_id(_df): + """Get the counts of the short TREATMENT_IDs occurring""" + _counts_by_treatment_id = _df \ + .loc[pd.to_datetime(_df['date']).between(*TARGET_PERIOD), 'TREATMENT_ID'] \ + .apply(pd.Series) \ + .sum() \ + .astype(int) + return _counts_by_treatment_id.groupby(level=0).sum() + +year_target = 2023 +def get_counts_of_hsi_by_treatment_id_by_year(_df): + """Get the counts of the short TREATMENT_IDs occurring""" + _counts_by_treatment_id = _df \ + .loc[pd.to_datetime(_df['date']).dt.year ==year_target, 'TREATMENT_ID'] \ + .apply(pd.Series) \ + .sum() \ + .astype(int) + return _counts_by_treatment_id.groupby(level=0).sum() + +def get_counts_of_hsi_by_short_treatment_id(_df): + """Get the counts of the short TREATMENT_IDs occurring (shortened, up to first underscore)""" + _counts_by_treatment_id = get_counts_of_hsi_by_treatment_id(_df) + _short_treatment_id = _counts_by_treatment_id.index.map(lambda x: x.split('_')[0] + "*") + return _counts_by_treatment_id.groupby(by=_short_treatment_id).sum() + +def get_counts_of_hsi_by_short_treatment_id_by_year(_df): + """Get the counts of the short TREATMENT_IDs occurring (shortened, up to first underscore)""" + _counts_by_treatment_id = get_counts_of_hsi_by_treatment_id_by_year(_df) + _short_treatment_id = _counts_by_treatment_id.index.map(lambda x: x.split('_')[0] + "*") + return _counts_by_treatment_id.groupby(by=_short_treatment_id).sum() + + +# Obtain parameter names for this scenario file +param_names = get_parameter_names_from_scenario_file() +print(param_names) + + +def get_counts_of_death_by_year(df): + """Aggregate the model outputs into five-year periods for age and time""" + #_, agegrplookup = make_age_grp_lookup() + #_, calperiodlookup = make_calendar_period_lookup() + df["year"] = df["date"].dt.year + #df["age_grp"] = df["age"].map(agegrplookup).astype(make_age_grp_types()) + #df["period"] = df["year"].map(calperiodlookup).astype(make_calendar_period_type()) + return df.groupby(by=["year", "label"])["person_id"].count() + + +deaths_by_year_and_cause = extract_results( + results_folder, + module="tlo.methods.demography", + key="death", + custom_generate_series=get_counts_of_death_by_year, + do_scaling=True +).pipe(set_param_names_as_column_index_level_0) + + +print(deaths_by_year_and_cause) +print(list(deaths_by_year_and_cause.index)) +deaths_by_year_and_cause.to_csv('ConvertedOutputs/Deaths_by_cause_with_time.csv', index=True) +exit(-1) + +# ================================================================================================ +# TIME EVOLUTION OF TOTAL DALYs +# Plot DALYs averted compared to the ``No Policy'' policy + +year_target = 2023 # This global variable will be passed to custom function +def get_num_dalys_in_year(_df): + """Return total number of DALYs (Stacked) by label (total within the TARGET_PERIOD)""" + return pd.Series( + data=_df + .loc[_df.year == year_target] + .drop(columns=['date', 'sex', 'age_range', 'year']) + .sum().sum() + ) + +ALL = {} +# Plot time trend show year prior transition as well to emphasise that until that point DALYs incurred +# are consistent across different policies +this_min_year = 2010 +for year in range(this_min_year, max_year+1): + year_target = year + num_dalys_by_year = extract_results( + results_folder, + module='tlo.methods.healthburden', + key='dalys_stacked', + custom_generate_series=get_num_dalys_in_year, + do_scaling=True + ).pipe(set_param_names_as_column_index_level_0) + ALL[year_target] = num_dalys_by_year + print(num_dalys_by_year.columns) +# Concatenate the DataFrames into a single DataFrame +concatenated_df = pd.concat(ALL.values(), keys=ALL.keys()) +concatenated_df.index = concatenated_df.index.set_names(['date', 'index_original']) +concatenated_df = concatenated_df.reset_index(level='index_original',drop=True) +dalys_by_year = concatenated_df +print(dalys_by_year) +dalys_by_year.to_csv('ConvertedOutputs/Total_DALYs_with_time.csv', index=True) + +# ================================================================================================ +# Print population under each scenario +pop_model = extract_results(results_folder, + module="tlo.methods.demography", + key="population", + column="total", + index="date", + do_scaling=True + ).pipe(set_param_names_as_column_index_level_0) + +pop_model.index = pop_model.index.year +pop_model = pop_model[(pop_model.index >= this_min_year) & (pop_model.index <= max_year)] +print(pop_model) +assert dalys_by_year.index.equals(pop_model.index) +assert all(dalys_by_year.columns == pop_model.columns) +pop_model.to_csv('ConvertedOutputs/Population_with_time.csv', index=True) + +# ================================================================================================ +# DALYs BROKEN DOWN BY CAUSES AND YEAR +# DALYs by cause per year +# %% Quantify the health losses associated with all interventions combined. + +year_target = 2023 # This global variable will be passed to custom function +def get_num_dalys_by_year_and_cause(_df): + """Return total number of DALYs (Stacked) by label (total within the TARGET_PERIOD)""" + return pd.Series( + data=_df + .loc[_df.year == year_target] + .drop(columns=['date', 'sex', 'age_range', 'year']) + .sum() + ) + +ALL = {} +# Plot time trend show year prior transition as well to emphasise that until that point DALYs incurred +# are consistent across different policies +this_min_year = 2010 +for year in range(this_min_year, max_year+1): + year_target = year + num_dalys_by_year = extract_results( + results_folder, + module='tlo.methods.healthburden', + key='dalys_stacked', + custom_generate_series=get_num_dalys_by_year_and_cause, + do_scaling=True + ).pipe(set_param_names_as_column_index_level_0) + ALL[year_target] = num_dalys_by_year #summarize(num_dalys_by_year) + +# Concatenate the DataFrames into a single DataFrame +concatenated_df = pd.concat(ALL.values(), keys=ALL.keys()) + +concatenated_df.index = concatenated_df.index.set_names(['date', 'cause']) + +df_total = concatenated_df +df_total.to_csv('ConvertedOutputs/DALYS_by_cause_with_time.csv', index=True) + +ALL = {} +# Plot time trend show year prior transition as well to emphasise that until that point DALYs incurred +# are consistent across different policies +for year in range(min_year, max_year+1): + year_target = year + + hsi_delivered_by_year = extract_results( + results_folder, + module='tlo.methods.healthsystem.summary', + key='HSI_Event', + custom_generate_series=get_counts_of_hsi_by_short_treatment_id_by_year, + do_scaling=True + ).pipe(set_param_names_as_column_index_level_0) + ALL[year_target] = hsi_delivered_by_year + +# Concatenate the DataFrames into a single DataFrame +concatenated_df = pd.concat(ALL.values(), keys=ALL.keys()) +concatenated_df.index = concatenated_df.index.set_names(['date', 'cause']) +HSI_ran_by_year = concatenated_df + +del ALL + +ALL = {} +# Plot time trend show year prior transition as well to emphasise that until that point DALYs incurred +# are consistent across different policies +for year in range(min_year, max_year+1): + year_target = year + + hsi_not_delivered_by_year = extract_results( + results_folder, + module='tlo.methods.healthsystem.summary', + key='Never_ran_HSI_Event', + custom_generate_series=get_counts_of_hsi_by_short_treatment_id_by_year, + do_scaling=True + ).pipe(set_param_names_as_column_index_level_0) + ALL[year_target] = hsi_not_delivered_by_year + +# Concatenate the DataFrames into a single DataFrame +concatenated_df = pd.concat(ALL.values(), keys=ALL.keys()) +concatenated_df.index = concatenated_df.index.set_names(['date', 'cause']) +HSI_never_ran_by_year = concatenated_df + +HSI_never_ran_by_year = HSI_never_ran_by_year.fillna(0) #clean_df( +HSI_ran_by_year = HSI_ran_by_year.fillna(0) +HSI_total_by_year = HSI_ran_by_year.add(HSI_never_ran_by_year, fill_value=0) +HSI_ran_by_year.to_csv('ConvertedOutputs/HSIs_ran_by_area_with_time.csv', index=True) +HSI_never_ran_by_year.to_csv('ConvertedOutputs/HSIs_never_ran_by_area_with_time.csv', index=True) +print(HSI_ran_by_year) +print(HSI_never_ran_by_year) +print(HSI_total_by_year) +exit(-1) + +""" +if __name__ == "__main__": + rfp = Path('resources') + + parser = argparse.ArgumentParser( + description="Produce plots to show the impact each set of treatments", + formatter_class=argparse.ArgumentDefaultsHelpFormatter, + ) + parser.add_argument( + "--output-path", + help=( + "Directory to write outputs to. If not specified (set to None) outputs " + "will be written to value of --results-path argument." + ), + type=Path, + default=None, + required=False, + ) + parser.add_argument( + "--resources-path", + help="Directory containing resource files", + type=Path, + default=Path('resources'), + required=False, + ) + parser.add_argument( + "--results-path", + type=Path, + help=( + "Directory containing results from running " + "src/scripts/healthsystem/impact_of_const_capabilities_expansion/scenario_impact_of_capabilities_expansion_scaling.py " + ), + default=None, + required=False + ) + args = parser.parse_args() + assert args.results_path is not None + results_path = args.results_path + + output_path = results_path if args.output_path is None else args.output_path + + apply( + results_folder=results_path, + output_folder=output_path, + resourcefilepath=args.resources_path + ) +""" diff --git a/src/scripts/rti_emulator/scenario_test_rti_emulator.py b/src/scripts/rti_emulator/scenario_test_rti_emulator.py new file mode 100644 index 0000000000..1fd15e8641 --- /dev/null +++ b/src/scripts/rti_emulator/scenario_test_rti_emulator.py @@ -0,0 +1,145 @@ +"""This Scenario file run the model to test the use of the RTI emulator + +Run on the batch system using: +``` +tlo batch-submit + src/scripts/analysis_data_generation/scenario_test_rti_emulator.py +``` + +or locally using: +``` + tlo scenario-run src/scripts/rti_emulator/scenario_test_rti_emulator.py +``` + +""" +from pathlib import Path +from typing import Dict + +import pandas as pd + +from tlo import Date, logging +from tlo.analysis.utils import get_parameters_for_status_quo, mix_scenarios +from tlo.methods.fullmodel import fullmodel +from tlo.methods.scenario_switcher import ImprovedHealthSystemAndCareSeekingScenarioSwitcher +from tlo.scenario import BaseScenario +from tlo.methods import ( + alri, + cardio_metabolic_disorders, + care_of_women_during_pregnancy, + simplified_births, + contraception, + demography, + depression, + diarrhoea, + enhanced_lifestyle, + epi, + healthburden, + healthseekingbehaviour, + healthsystem, + hiv, + rti, + labour, + malaria, + newborn_outcomes, + postnatal_supervisor, + pregnancy_supervisor, + stunting, + symptommanager, + tb, + wasting, +) + +class GenerateDataChains(BaseScenario): + def __init__(self): + super().__init__() + self.seed = 0 + self.start_date = Date(2010, 1, 1) + self.end_date = self.start_date + pd.DateOffset(years=10) + self.pop_size = 50_000 + self._scenarios = self._get_scenarios() + self.number_of_draws = len(self._scenarios) + self.runs_per_draw = 50 + + def log_configuration(self): + return { + 'filename': 'test_rti_emulator', + 'directory': Path('./outputs'), # <- (specified only for local running) + 'custom_levels': { + '*': logging.WARNING, + 'tlo.methods.demography': logging.INFO, + 'tlo.methods.events': logging.INFO, + 'tlo.methods.demography.detail': logging.WARNING, + 'tlo.methods.healthburden': logging.INFO, + 'tlo.methods.healthsystem.summary': logging.INFO, + } + } + + def modules(self): + # MODIFY + # Here instead of running full module + return [demography.Demography(resourcefilepath=self.resources), + enhanced_lifestyle.Lifestyle(resourcefilepath=self.resources), + healthburden.HealthBurden(resourcefilepath=self.resources), + symptommanager.SymptomManager(resourcefilepath=self.resources, spurious_symptoms=False), + rti.RTI(resourcefilepath=self.resources), + healthseekingbehaviour.HealthSeekingBehaviour(resourcefilepath=self.resources), + simplified_births.SimplifiedBirths(resourcefilepath=self.resources), + healthsystem.HealthSystem(resourcefilepath=self.resources, + mode_appt_constraints=1, + cons_availability='all')] + + # return ( + # fullmodel(resourcefilepath=self.resources) + # + [ImprovedHealthSystemAndCareSeekingScenarioSwitcher(resourcefilepath=self.resources)] + # ) + + def draw_parameters(self, draw_number, rng): + if draw_number < self.number_of_draws: + return list(self._scenarios.values())[draw_number] + else: + return + + # case 1: gfHE = -0.030, factor = 1.01074 + # case 2: gfHE = -0.020, factor = 1.02116 + # case 3: gfHE = -0.015, factor = 1.02637 + # case 4: gfHE = 0.015, factor = 1.05763 + # case 5: gfHE = 0.020, factor = 1.06284 + # case 6: gfHE = 0.030, factor = 1.07326 + + def _get_scenarios(self) -> Dict[str, Dict]: + """Return the Dict with values for the parameters that are changed, keyed by a name for the scenario. + """ + + return { + + # =========== STATUS QUO ============ + "Baseline": + mix_scenarios( + self._baseline(), + #{ + # "HealthSystem": { + # "yearly_HR_scaling_mode": "no_scaling", + # }, + #} + ), + + } + + def _baseline(self) -> Dict: + """Return the Dict with values for the parameter changes that define the baseline scenario. """ + return mix_scenarios( + get_parameters_for_status_quo(), + { + "HealthSystem": { + "mode_appt_constraints": 1, + "use_funded_or_actual_staffing": "actual", + "cons_availability": "all", + "Service_Availability": [] + } + }, + ) + +if __name__ == '__main__': + from tlo.cli import scenario_run + + scenario_run([__file__]) diff --git a/src/tlo/methods/demography.py b/src/tlo/methods/demography.py index 2acaad75eb..78d8fe5006 100644 --- a/src/tlo/methods/demography.py +++ b/src/tlo/methods/demography.py @@ -1,3 +1,4 @@ +0 """ The core demography module and its associated events. * Sets initial population size @@ -758,7 +759,10 @@ def apply(self, population): mort_risk = self.mort_risk_per_poll.loc[ self.mort_risk_per_poll.fallbackyear == fallbackyear, [ 'age_years', 'sex', 'prob_of_dying_before_next_poll']].copy() - + + # Artificially increase risk for men under 50 by 50 + mort_risk.loc[(mort_risk["sex"] == "M") & (mort_risk["age_years"] < 50), "prob_of_dying_before_next_poll"] *= 250 + # get the population alive = df.loc[df.is_alive & (df.age_years < MAX_AGE), ['sex', 'age_years']].copy() diff --git a/src/tlo/methods/healthsystem.py b/src/tlo/methods/healthsystem.py index 98c07aa4df..172556016d 100644 --- a/src/tlo/methods/healthsystem.py +++ b/src/tlo/methods/healthsystem.py @@ -2016,6 +2016,34 @@ def on_end_of_year(self) -> None: and self.parameters['scale_to_effective_capabilities'] ): self._rescale_capabilities_to_capture_effective_capability() + + # Add emulated appts to real ones in HS summary counters before logging the latter. + # Notes: + # 1) The emulated count is defined in the disease module itself, rather than in the HS, but + # is reset at the HS level after the emulated appts have been added to the total count. + # I *think* this is correct, but we may wish to discuss further. + # 2) Currently this is RTI-specific: in the future, this would have to be generalised to + # any emulated module + + # To total appt count + if 'RTI' in self.sim.modules and self.sim.modules['RTI'].parameters['use_RTI_emulator']: + for key, value in self.sim.modules['RTI'].HS_Use_by_RTI.items(): + # Extract the category part (ignoring the level) + _, category = key.split('_', 1) + self._summary_counter._appts[category] += value + + # To appt countbroken down by level + for key, value in self.sim.modules['RTI'].HS_Use_by_RTI.items(): + # Split key into level and category + level, category = key.split('_', 1) + level_number = level.replace("Level", "") + + if level_number in self._summary_counter._appts_by_level: # Ensure the level exists in the second dictionary + self._summary_counter._appts_by_level[level_number][category] += value + + # Reset emulator counter. + self.sim.modules['RTI'].HS_Use_by_RTI = Counter({col: 0 for col in self.sim.modules['RTI'].HS_Use_Type}) + self._summary_counter.write_to_log_and_reset_counters() self.consumables.on_end_of_year() self.bed_days.on_end_of_year() @@ -2023,6 +2051,7 @@ def on_end_of_year(self) -> None: self._write_hsi_event_counts_to_log_and_reset() self._write_never_ran_hsi_event_counts_to_log_and_reset() + # Record equipment usage for the year, for each facility self._record_general_equipment_usage_for_year() @@ -2739,6 +2768,7 @@ def record_hs_status( def write_to_log_and_reset_counters(self): """Log summary statistics reset the data structures. This usually occurs at the end of the year.""" + logger_summary.info( key="HSI_Event", description="Counts of the HSI_Events that have occurred in this calendar year by TREATMENT_ID, " diff --git a/src/tlo/methods/rti.py b/src/tlo/methods/rti.py index 994b8d1054..745b38d2b7 100644 --- a/src/tlo/methods/rti.py +++ b/src/tlo/methods/rti.py @@ -6,11 +6,13 @@ from pathlib import Path from typing import TYPE_CHECKING, List, Optional +from collections import Counter import numpy as np import pandas as pd -from tlo import DateOffset, Module, Parameter, Property, Types, logging +from tlo import DateOffset, Module, Parameter, Property, Types, logging, Date + from tlo.events import Event, IndividualScopeEventMixin, PopulationScopeEventMixin, RegularEvent from tlo.lm import LinearModel, LinearModelType, Predictor from tlo.methods import Metadata @@ -20,6 +22,11 @@ from tlo.methods.symptommanager import Symptom from tlo.util import read_csv_files +from sdv.single_table import CTGANSynthesizer +from sdv.single_table import GaussianCopulaSynthesizer +from sdv.single_table import TVAESynthesizer +from sdv.sampling import Condition + if TYPE_CHECKING: from tlo.methods.hsi_generic_first_appts import HSIEventScheduler from tlo.population import IndividualProperties @@ -51,6 +58,26 @@ def __init__(self, name=None): 'Lifestyle', 'HealthSystem', } + + # ================================================================================ + # EMULATOR PARAMETERS + # Counters tracking use of HealthSystem by RTI module under use of emulator + HS_Use_Type = [ + 'Level2_AccidentsandEmerg', 'Level2_DiagRadio', 'Level2_EPI', + 'Level2_IPAdmission', 'Level2_InpatientDays', 'Level2_MajorSurg', + 'Level2_MinorSurg', 'Level2_Over5OPD', 'Level2_Tomography', 'Level2_Under5OPD' + ] + + # Initialize the counter with all items set to 0 + HS_Use_by_RTI = Counter({col: 0 for col in HS_Use_Type}) + + Rti_Services = ['Rti_AcutePainManagement','Rti_BurnManagement','Rti_FractureCast','Rti_Imaging','Rti_MajorSurgeries','Rti_MedicalIntervention','Rti_MinorSurgeries','Rti_OpenFractureTreatment','Rti_ShockTreatment','Rti_Suture','Rti_TetanusVaccine'] + + HS_conditions = {} + + RTI_emulator = None + # ================================================================================ + INJURY_INDICES = range(1, 9) @@ -1020,70 +1047,85 @@ def __init__(self, name=None): 'maximum_number_of_times_HSI_events_should_run': Parameter( Types.INT, "limit on the number of times an HSI event can run" + ), + 'use_RTI_emulator': Parameter( + Types.BOOL, + "Replace module with RTI emulator, valid if running in mode 1 with actual consumable availability" ) - } - # Define the module's parameters - PROPERTIES = { - 'rt_road_traffic_inc': Property(Types.BOOL, 'involved in a road traffic injury'), - 'rt_inj_severity': Property(Types.CATEGORICAL, - 'Injury status relating to road traffic injury: none, mild, severe', - categories=['none', 'mild', 'severe'], - ), - **{ - f'rt_injury_{injury_index}': Property( - Types.CATEGORICAL, - f'Codes for injury {injury_index} from RTI', - categories=categories, - ) - # hacky solution to avoid issue that names defined in class scope are not - # accessible in scope of comprehension expresions _except for_ outermost - # iterator - see https://stackoverflow.com/a/13913933/4798943 - for injury_index, categories in zip( - INJURY_INDICES, [INJURY_CODES] * len(INJURY_INDICES) - ) - }, - **{ - f'rt_date_to_remove_daly_{injury_index}': Property( - Types.DATE, - f'Date to remove the daly weight for injury {injury_index}', - ) - for injury_index in INJURY_INDICES - }, - 'rt_in_shock': Property(Types.BOOL, 'A property determining if this person is in shock'), - 'rt_death_from_shock': Property(Types.BOOL, 'whether this person died from shock'), - 'rt_injuries_to_cast': Property(Types.LIST, 'A list of injuries that are to be treated with casts'), - 'rt_injuries_for_minor_surgery': Property(Types.LIST, 'A list of injuries that are to be treated with a minor' - 'surgery'), - 'rt_injuries_for_major_surgery': Property(Types.LIST, 'A list of injuries that are to be treated with a minor' - 'surgery'), - 'rt_injuries_to_heal_with_time': Property(Types.LIST, 'A list of injuries that heal without further treatment'), - 'rt_injuries_for_open_fracture_treatment': Property(Types.LIST, 'A list of injuries that with open fracture ' - 'treatment'), - 'rt_ISS_score': Property(Types.INT, 'The ISS score associated with the injuries resulting from a road traffic' - 'accident'), - 'rt_perm_disability': Property(Types.BOOL, 'whether the injuries from an RTI result in permanent disability'), - 'rt_polytrauma': Property(Types.BOOL, 'polytrauma from RTI'), - 'rt_imm_death': Property(Types.BOOL, 'death at scene True/False'), - 'rt_diagnosed': Property(Types.BOOL, 'Person has had their injuries diagnosed'), - 'rt_post_med_death': Property(Types.BOOL, 'death in following month despite medical intervention True/False'), - 'rt_no_med_death': Property(Types.BOOL, 'death in following month without medical intervention True/False'), - 'rt_unavailable_med_death': Property(Types.BOOL, 'death in the following month without medical intervention ' - 'being able to be provided'), - 'rt_recovery_no_med': Property(Types.BOOL, 'recovery without medical intervention True/False'), - 'rt_disability': Property(Types.REAL, 'disability weight for current month'), - 'rt_date_inj': Property(Types.DATE, 'date of latest injury'), - 'rt_med_int': Property(Types.BOOL, 'whether this person is currently undergoing medical treatment'), - 'rt_in_icu_or_hdu': Property(Types.BOOL, 'whether this person is currently in ICU for RTI'), - 'rt_MAIS_military_score': Property(Types.INT, 'the maximum AIS-military score, used as a proxy to calculate the' - 'probability of mortality without medical intervention'), - 'rt_date_death_no_med': Property(Types.DATE, 'the date which the person has is scheduled to die without medical' - 'intervention'), - 'rt_debugging_DALY_wt': Property(Types.REAL, 'The true value of the DALY weight burden'), - 'rt_injuries_left_untreated': Property(Types.LIST, 'A list of injuries that have been left untreated due to a ' - 'blocked intervention') - } + # Define the module's properties + # ISSUE: When using an emulator, want to limit properties to these four. However parameters have not yet been initialised at this point, so we don't know whether user intends to use emulator or not. + if True: + # This should be: if parameters['use_RTI_emulator']: + # PROPERTIES = { + # 'rt_disability': Property(Types.REAL, 'disability weight for current month'), + # 'rt_disability_permanent': Property(Types.REAL, 'disability weight incurred permanently'), + # 'rt_date_inj': Property(Types.DATE, 'date of latest injury'), + # 'rt_road_traffic_inc': Property(Types.BOOL, 'involved in a road traffic injury'), + # } + #else: + PROPERTIES = { + 'rt_disability': Property(Types.REAL, 'disability weight for current month'), + 'rt_disability_permanent': Property(Types.REAL, 'disability weight incurred permanently'), + 'rt_date_inj': Property(Types.DATE, 'date of latest injury'), + 'rt_road_traffic_inc': Property(Types.BOOL, 'involved in a road traffic injury'), + 'rt_inj_severity': Property(Types.CATEGORICAL, + 'Injury status relating to road traffic injury: none, mild, severe', + categories=['none', 'mild', 'severe'], + ), + **{ + f'rt_injury_{injury_index}': Property( + Types.CATEGORICAL, + f'Codes for injury {injury_index} from RTI', + categories=categories, + ) + # hacky solution to avoid issue that names defined in class scope are not + # accessible in scope of comprehension expresions _except for_ outermost + # iterator - see https://stackoverflow.com/a/13913933/4798943 + for injury_index, categories in zip( + INJURY_INDICES, [INJURY_CODES] * len(INJURY_INDICES) + ) + }, + **{ + f'rt_date_to_remove_daly_{injury_index}': Property( + Types.DATE, + f'Date to remove the daly weight for injury {injury_index}', + ) + for injury_index in INJURY_INDICES + }, + 'rt_in_shock': Property(Types.BOOL, 'A property determining if this person is in shock'), + 'rt_death_from_shock': Property(Types.BOOL, 'whether this person died from shock'), + 'rt_injuries_to_cast': Property(Types.LIST, 'A list of injuries that are to be treated with casts'), + 'rt_injuries_for_minor_surgery': Property(Types.LIST, 'A list of injuries that are to be treated with a minor' + 'surgery'), + 'rt_injuries_for_major_surgery': Property(Types.LIST, 'A list of injuries that are to be treated with a minor' + 'surgery'), + 'rt_injuries_to_heal_with_time': Property(Types.LIST, 'A list of injuries that heal without further treatment'), + 'rt_injuries_for_open_fracture_treatment': Property(Types.LIST, 'A list of injuries that with open fracture ' + 'treatment'), + 'rt_ISS_score': Property(Types.INT, 'The ISS score associated with the injuries resulting from a road traffic' + 'accident'), + 'rt_perm_disability': Property(Types.BOOL, 'whether the injuries from an RTI result in permanent disability'), + 'rt_polytrauma': Property(Types.BOOL, 'polytrauma from RTI'), + 'rt_imm_death': Property(Types.BOOL, 'death at scene True/False'), + 'rt_diagnosed': Property(Types.BOOL, 'Person has had their injuries diagnosed'), + 'rt_post_med_death': Property(Types.BOOL, 'death in following month despite medical intervention True/False'), + 'rt_no_med_death': Property(Types.BOOL, 'death in following month without medical intervention True/False'), + 'rt_unavailable_med_death': Property(Types.BOOL, 'death in the following month without medical intervention ' + 'being able to be provided'), + 'rt_recovery_no_med': Property(Types.BOOL, 'recovery without medical intervention True/False'), + + 'rt_med_int': Property(Types.BOOL, 'whether this person is currently undergoing medical treatment'), + 'rt_in_icu_or_hdu': Property(Types.BOOL, 'whether this person is currently in ICU for RTI'), + 'rt_MAIS_military_score': Property(Types.INT, 'the maximum AIS-military score, used as a proxy to calculate the' + 'probability of mortality without medical intervention'), + 'rt_date_death_no_med': Property(Types.DATE, 'the date which the person has is scheduled to die without medical' + 'intervention'), + 'rt_debugging_DALY_wt': Property(Types.REAL, 'The true value of the DALY weight burden'), + 'rt_injuries_left_untreated': Property(Types.LIST, 'A list of injuries that have been left untreated due to a ' + 'blocked intervention') + } # Declare Metadata METADATA = { @@ -1438,6 +1480,10 @@ def read_parameters(self, resourcefilepath: Optional[Path] = None): non_zero_total_daly_change = sum_check_daly_change_df.where(sum_check_daly_change_df > 0).dropna().index # ensure that these injuries are the permanent injuries assert non_zero_total_daly_change.to_list() == permanent_injuries + + # If using emulator for RTI module, load it here + if self.parameters['use_RTI_emulator']: + self.RTI_emulator = CTGANSynthesizer.load(filepath= resourcefilepath / 'ResourceFile_RTI/RTI_emulator.pkl') def rti_injury_diagnosis(self, person_id, the_appt_footprint): """ @@ -1475,37 +1521,47 @@ def initialise_population(self, population): for the RTI module is that people haven't been involved in a road traffic accident and are therefor alive and healthy.""" df = population.props - df.loc[df.is_alive, 'rt_road_traffic_inc'] = False - df.loc[df.is_alive, 'rt_inj_severity'] = "none" # default: no one has been injured in a RTI - for injury_index in RTI.INJURY_INDICES: - df.loc[df.is_alive, f'rt_injury_{injury_index}'] = "none" - df.loc[df.is_alive, f'rt_date_to_remove_daly_{injury_index}'] = pd.NaT - df.loc[df.is_alive, 'rt_in_shock'] = False - df.loc[df.is_alive, 'rt_death_from_shock'] = False - df.loc[df.is_alive, 'rt_polytrauma'] = False - df.loc[df.is_alive, 'rt_ISS_score'] = 0 - df.loc[df.is_alive, 'rt_perm_disability'] = False - df.loc[df.is_alive, 'rt_imm_death'] = False # default: no one is dead on scene of crash - df.loc[df.is_alive, 'rt_diagnosed'] = False - df.loc[df.is_alive, 'rt_recovery_no_med'] = False # default: no recovery without medical intervention - df.loc[df.is_alive, 'rt_post_med_death'] = False # default: no death after medical intervention - df.loc[df.is_alive, 'rt_no_med_death'] = False - df.loc[df.is_alive, 'rt_unavailable_med_death'] = False - df.loc[df.is_alive, 'rt_disability'] = 0 # default: no DALY - df.loc[df.is_alive, 'rt_date_inj'] = pd.NaT - df.loc[df.is_alive, 'rt_med_int'] = False - df.loc[df.is_alive, 'rt_in_icu_or_hdu'] = False - df.loc[df.is_alive, 'rt_MAIS_military_score'] = 0 - df.loc[df.is_alive, 'rt_date_death_no_med'] = pd.NaT - df.loc[df.is_alive, 'rt_debugging_DALY_wt'] = 0 - alive_count = sum(df.is_alive) - df.loc[df.is_alive, 'rt_injuries_to_cast'] = pd.Series([[] for _ in range(alive_count)]) - df.loc[df.is_alive, 'rt_injuries_for_minor_surgery'] = pd.Series([[] for _ in range(alive_count)]) - df.loc[df.is_alive, 'rt_injuries_for_major_surgery'] = pd.Series([[] for _ in range(alive_count)]) - df.loc[df.is_alive, 'rt_injuries_to_heal_with_time'] = pd.Series([[] for _ in range(alive_count)]) - df.loc[df.is_alive, 'rt_injuries_for_open_fracture_treatment'] = pd.Series([[] for _ in range(alive_count)]) - df.loc[df.is_alive, 'rt_injuries_left_untreated'] = pd.Series([[] for _ in range(alive_count)]) - + + if True: + #if self.parameters['use_RTI_emulator']: + # df.loc[df.is_alive, 'rt_disability'] = 0 # default: no DALY + # df.loc[df.is_alive, 'rt_disability_permanent'] = 0 # default: no DALY + # df.loc[df.is_alive, 'rt_road_traffic_inc'] = False + # df.loc[df.is_alive, 'rt_date_inj'] = pd.NaT + + # else: + df.loc[df.is_alive, 'rt_disability'] = 0 # default: no DALY + df.loc[df.is_alive, 'rt_disability_permanent'] = 0 # default: no DALY + df.loc[df.is_alive, 'rt_road_traffic_inc'] = False + df.loc[df.is_alive, 'rt_inj_severity'] = "none" # default: no one has been injured in a RTI + df.loc[df.is_alive, 'rt_date_inj'] = pd.NaT + for injury_index in RTI.INJURY_INDICES: + df.loc[df.is_alive, f'rt_injury_{injury_index}'] = "none" + df.loc[df.is_alive, f'rt_date_to_remove_daly_{injury_index}'] = pd.NaT + df.loc[df.is_alive, 'rt_in_shock'] = False + df.loc[df.is_alive, 'rt_death_from_shock'] = False + df.loc[df.is_alive, 'rt_polytrauma'] = False + df.loc[df.is_alive, 'rt_ISS_score'] = 0 + df.loc[df.is_alive, 'rt_perm_disability'] = False + df.loc[df.is_alive, 'rt_imm_death'] = False # default: no one is dead on scene of crash + df.loc[df.is_alive, 'rt_diagnosed'] = False + df.loc[df.is_alive, 'rt_recovery_no_med'] = False # default: no recovery without medical intervention + df.loc[df.is_alive, 'rt_post_med_death'] = False # default: no death after medical intervention + df.loc[df.is_alive, 'rt_no_med_death'] = False + df.loc[df.is_alive, 'rt_unavailable_med_death'] = False + df.loc[df.is_alive, 'rt_med_int'] = False + df.loc[df.is_alive, 'rt_in_icu_or_hdu'] = False + df.loc[df.is_alive, 'rt_MAIS_military_score'] = 0 + df.loc[df.is_alive, 'rt_date_death_no_med'] = pd.NaT + df.loc[df.is_alive, 'rt_debugging_DALY_wt'] = 0 + alive_count = sum(df.is_alive) + df.loc[df.is_alive, 'rt_injuries_to_cast'] = pd.Series([[] for _ in range(alive_count)]) + df.loc[df.is_alive, 'rt_injuries_for_minor_surgery'] = pd.Series([[] for _ in range(alive_count)]) + df.loc[df.is_alive, 'rt_injuries_for_major_surgery'] = pd.Series([[] for _ in range(alive_count)]) + df.loc[df.is_alive, 'rt_injuries_to_heal_with_time'] = pd.Series([[] for _ in range(alive_count)]) + df.loc[df.is_alive, 'rt_injuries_for_open_fracture_treatment'] = pd.Series([[] for _ in range(alive_count)]) + df.loc[df.is_alive, 'rt_injuries_left_untreated'] = pd.Series([[] for _ in range(alive_count)]) + def initialise_simulation(self, sim): """At the start of the simulation we schedule a logging event, which records the relevant information regarding road traffic injuries in the last month. @@ -1523,12 +1579,28 @@ def initialise_simulation(self, sim): """ # Begin modelling road traffic injuries sim.schedule_event(RTIPollingEvent(self), sim.date + DateOffset(months=0)) - # Begin checking whether the persons injuries are healed - sim.schedule_event(RTI_Recovery_Event(self), sim.date + DateOffset(months=0)) - # Begin checking whether those with untreated injuries die - sim.schedule_event(RTI_Check_Death_No_Med(self), sim.date + DateOffset(months=0)) - # Begin logging the RTI events - sim.schedule_event(RTI_Logging_Event(self), sim.date + DateOffset(months=1)) + + if self.parameters['use_RTI_emulator'] is False: + # Begin checking whether the persons injuries are healed + sim.schedule_event(RTI_Recovery_Event(self), sim.date + DateOffset(months=0)) + # Begin checking whether those with untreated injuries die + sim.schedule_event(RTI_Check_Death_No_Med(self), sim.date + DateOffset(months=0)) + # Begin logging the RTI events + sim.schedule_event(RTI_Logging_Event(self), sim.date + DateOffset(months=1)) + + else: + print("======") + # If all services are included, set everything to True + if sim.modules['HealthSystem'].service_availability == ['*']: + for i in sim.modules['RTI'].Rti_Services: + sim.modules['RTI'].HS_conditions[i] = True + else: + for i in sim.modules['RTI'].Rti_Services: + if (i + '_*') in sim.modules['HealthSystem'].service_availability: + sim.modules['RTI'].HS_conditions[i] = True + else: + sim.modules['RTI'].HS_conditions[i] = False + # Look-up consumable item codes self.look_up_consumable_item_codes() @@ -2036,6 +2108,7 @@ def rti_assign_daly_weights(self, injured_index): df.loc[DALYweightunderlimit, 'rt_disability'] = 0 assert (df.loc[injured_index, 'rt_disability'] > 0).all() + def rti_alter_daly_post_treatment(self, person_id, codes): """ This function removes the DALY weight associated with each injury code after treatment is complete. This @@ -2267,35 +2340,43 @@ def on_birth(self, mother_id, child_id): :return: n/a """ df = self.sim.population.props - df.at[child_id, 'rt_road_traffic_inc'] = False - df.at[child_id, 'rt_inj_severity'] = "none" # default: no one has been injured in a RTI - for injury_index in RTI.INJURY_INDICES: - df.at[child_id, f'rt_injury_{injury_index}'] = "none" - df.at[child_id, f'rt_date_to_remove_daly_{injury_index}'] = pd.NaT - df.at[child_id, 'rt_in_shock'] = False - df.at[child_id, 'rt_death_from_shock'] = False - df.at[child_id, 'rt_injuries_to_cast'] = [] - df.at[child_id, 'rt_injuries_for_minor_surgery'] = [] - df.at[child_id, 'rt_injuries_for_major_surgery'] = [] - df.at[child_id, 'rt_injuries_to_heal_with_time'] = [] - df.at[child_id, 'rt_injuries_for_open_fracture_treatment'] = [] - df.at[child_id, 'rt_polytrauma'] = False - df.at[child_id, 'rt_ISS_score'] = 0 - df.at[child_id, 'rt_imm_death'] = False - df.at[child_id, 'rt_perm_disability'] = False - df.at[child_id, 'rt_med_int'] = False # default: no one has a had medical intervention - df.at[child_id, 'rt_in_icu_or_hdu'] = False - df.at[child_id, 'rt_diagnosed'] = False - df.at[child_id, 'rt_recovery_no_med'] = False # default: no recovery without medical intervention - df.at[child_id, 'rt_post_med_death'] = False # default: no death after medical intervention - df.at[child_id, 'rt_no_med_death'] = False - df.at[child_id, 'rt_unavailable_med_death'] = False - df.at[child_id, 'rt_disability'] = 0 # default: no disability due to RTI - df.at[child_id, 'rt_date_inj'] = pd.NaT - df.at[child_id, 'rt_MAIS_military_score'] = 0 - df.at[child_id, 'rt_date_death_no_med'] = pd.NaT - df.at[child_id, 'rt_debugging_DALY_wt'] = 0 - df.at[child_id, 'rt_injuries_left_untreated'] = [] + + if True: + #if self.parameters['use_RTI_emulator']: + # df.at[child_id, 'rt_disability'] = 0 # default: no DALY + # df.at[child_id, 'rt_disability_permanent'] = 0 # default: no DALY + # df.at[child_id, 'rt_road_traffic_inc'] = False + # df.at[child_id, 'rt_date_inj'] = pd.NaT + #else: + df.at[child_id, 'rt_road_traffic_inc'] = False + df.at[child_id, 'rt_inj_severity'] = "none" # default: no one has been injured in a RTI + for injury_index in RTI.INJURY_INDICES: + df.at[child_id, f'rt_injury_{injury_index}'] = "none" + df.at[child_id, f'rt_date_to_remove_daly_{injury_index}'] = pd.NaT + df.at[child_id, 'rt_in_shock'] = False + df.at[child_id, 'rt_death_from_shock'] = False + df.at[child_id, 'rt_injuries_to_cast'] = [] + df.at[child_id, 'rt_injuries_for_minor_surgery'] = [] + df.at[child_id, 'rt_injuries_for_major_surgery'] = [] + df.at[child_id, 'rt_injuries_to_heal_with_time'] = [] + df.at[child_id, 'rt_injuries_for_open_fracture_treatment'] = [] + df.at[child_id, 'rt_polytrauma'] = False + df.at[child_id, 'rt_ISS_score'] = 0 + df.at[child_id, 'rt_imm_death'] = False + df.at[child_id, 'rt_perm_disability'] = False + df.at[child_id, 'rt_med_int'] = False # default: no one has a had medical intervention + df.at[child_id, 'rt_in_icu_or_hdu'] = False + df.at[child_id, 'rt_diagnosed'] = False + df.at[child_id, 'rt_recovery_no_med'] = False # default: no recovery without medical intervention + df.at[child_id, 'rt_post_med_death'] = False # default: no death after medical intervention + df.at[child_id, 'rt_no_med_death'] = False + df.at[child_id, 'rt_unavailable_med_death'] = False + df.at[child_id, 'rt_disability'] = 0 # default: no disability due to RTI + df.at[child_id, 'rt_date_inj'] = pd.NaT + df.at[child_id, 'rt_MAIS_military_score'] = 0 + df.at[child_id, 'rt_date_death_no_med'] = pd.NaT + df.at[child_id, 'rt_debugging_DALY_wt'] = 0 + df.at[child_id, 'rt_injuries_left_untreated'] = [] def look_up_consumable_item_codes(self): """Look up the item codes that used in the HSI in the module""" @@ -2737,7 +2818,6 @@ def do_at_generic_first_appt_emergency( # that represent disease events for particular persons. # --------------------------------------------------------------------------------------------------------- - class RTIPollingEvent(RegularEvent, PopulationScopeEventMixin): """The regular RTI event which handles all the initial RTI related changes to the dataframe. It can be thought of as the actual road traffic accident occurring. Specifically the event decides who is involved in a road traffic @@ -2797,6 +2877,30 @@ def __init__(self, module): self.prob_bleeding_leads_to_shock = p['prob_bleeding_leads_to_shock'] self.rt_emergency_care_ISS_score_cut_off = p['rt_emergency_care_ISS_score_cut_off'] + + def sample_NN_model(self, N): + data = { + "is_alive_after_RTI": np.random.choice([True, False], size=N, p=[0.98, 0.02]), + "duration_days": np.random.randint(0, 366, size=N), + "rt_disability_average": np.random.uniform(0, 1, size=N), + "rt_disability_permanent": np.random.uniform(0, 1, size=N), + + "Level2_AccidentsandEmerg": np.random.uniform(0, 3, size=N), + "Level2_DiagRadio": np.random.uniform(0, 3, size=N), + "Level2_EPI": np.random.uniform(0, 3, size=N), + "Level2_IPAdmission": np.random.uniform(0, 3, size=N), + "Level2_MajorSurg": np.random.uniform(0, 3, size=N), + "Level2_MinorSurg": np.random.uniform(0, 3, size=N), + "Level2_Over5OPD": np.random.uniform(0, 3, size=N), + "Level2_Tomography": np.random.uniform(0, 3, size=N), + "Level2_Under5OPD": np.random.uniform(0, 3, size=N), + + # Adding the 'Level2_InpatientDays' column with values between 0 and 200 + "Level2_InpatientDays": np.random.randint(0, 201, size=N) + } + + return pd.DataFrame(data) + def apply(self, population): """Apply this event to the population. @@ -2804,47 +2908,51 @@ def apply(self, population): """ df = population.props now = self.sim.date - # Reset injury properties after death, get an index of people who have died due to RTI, all causes - diedfromrtiidx = df.index[df.rt_imm_death | df.rt_post_med_death | df.rt_no_med_death | df.rt_death_from_shock | - df.rt_unavailable_med_death] - df.loc[diedfromrtiidx, "rt_imm_death"] = False - df.loc[diedfromrtiidx, "rt_post_med_death"] = False - df.loc[diedfromrtiidx, "rt_no_med_death"] = False - df.loc[diedfromrtiidx, "rt_unavailable_med_death"] = False - df.loc[diedfromrtiidx, "rt_disability"] = 0 - df.loc[diedfromrtiidx, "rt_med_int"] = False - df.loc[diedfromrtiidx, 'rt_in_icu_or_hdu'] = False - for index, row in df.loc[diedfromrtiidx].iterrows(): - df.at[index, 'rt_injuries_to_cast'] = [] - df.at[index, 'rt_injuries_for_minor_surgery'] = [] - df.at[index, 'rt_injuries_for_major_surgery'] = [] - df.at[index, 'rt_injuries_to_heal_with_time'] = [] - df.at[index, 'rt_injuries_for_open_fracture_treatment'] = [] - df.at[index, 'rt_injuries_left_untreated'] = [] - df.loc[diedfromrtiidx, "rt_diagnosed"] = False - df.loc[diedfromrtiidx, "rt_polytrauma"] = False - df.loc[diedfromrtiidx, "rt_inj_severity"] = "none" - df.loc[diedfromrtiidx, "rt_perm_disability"] = False - for injury_column in RTI.INJURY_COLUMNS: - df.loc[diedfromrtiidx, injury_column] = "none" - for date_to_remove_daly_column in RTI.DATE_TO_REMOVE_DALY_COLUMNS: - df.loc[diedfromrtiidx, date_to_remove_daly_column] = pd.NaT - df.loc[diedfromrtiidx, 'rt_date_death_no_med'] = pd.NaT - df.loc[diedfromrtiidx, 'rt_MAIS_military_score'] = 0 - df.loc[diedfromrtiidx, 'rt_debugging_DALY_wt'] = 0 - df.loc[diedfromrtiidx, 'rt_in_shock'] = False - # reset whether they have been selected for an injury this month - df['rt_road_traffic_inc'] = False - - # --------------------------------- UPDATING OF RTI OVER TIME ------------------------------------------------- - # Currently we have the following conditions for being able to be involved in a road traffic injury, they are - # alive, they aren't currently injured, they didn't die immediately in - # a road traffic injury in the last month and finally, they aren't currently being treated for a road traffic - # injury. - rt_current_non_ind = df.index[df.is_alive & ~df.rt_road_traffic_inc & ~df.rt_imm_death & ~df.rt_med_int & - (df.rt_inj_severity == "none")] - - # ========= Update for people currently not involved in a RTI, make some involved in a RTI event ============== + + if self.sim.modules['RTI'].parameters['use_RTI_emulator']: + rt_current_non_ind = df.index[df.is_alive & ~df.rt_road_traffic_inc] + else: + # Reset injury properties after death, get an index of people who have died due to RTI, all causes + diedfromrtiidx = df.index[df.rt_imm_death | df.rt_post_med_death | df.rt_no_med_death | df.rt_death_from_shock | + df.rt_unavailable_med_death] + df.loc[diedfromrtiidx, "rt_imm_death"] = False + df.loc[diedfromrtiidx, "rt_post_med_death"] = False + df.loc[diedfromrtiidx, "rt_no_med_death"] = False + df.loc[diedfromrtiidx, "rt_unavailable_med_death"] = False + df.loc[diedfromrtiidx, "rt_disability"] = 0 + df.loc[diedfromrtiidx, "rt_med_int"] = False + df.loc[diedfromrtiidx, 'rt_in_icu_or_hdu'] = False + for index, row in df.loc[diedfromrtiidx].iterrows(): + df.at[index, 'rt_injuries_to_cast'] = [] + df.at[index, 'rt_injuries_for_minor_surgery'] = [] + df.at[index, 'rt_injuries_for_major_surgery'] = [] + df.at[index, 'rt_injuries_to_heal_with_time'] = [] + df.at[index, 'rt_injuries_for_open_fracture_treatment'] = [] + df.at[index, 'rt_injuries_left_untreated'] = [] + df.loc[diedfromrtiidx, "rt_diagnosed"] = False + df.loc[diedfromrtiidx, "rt_polytrauma"] = False + df.loc[diedfromrtiidx, "rt_inj_severity"] = "none" + df.loc[diedfromrtiidx, "rt_perm_disability"] = False + for injury_column in RTI.INJURY_COLUMNS: + df.loc[diedfromrtiidx, injury_column] = "none" + for date_to_remove_daly_column in RTI.DATE_TO_REMOVE_DALY_COLUMNS: + df.loc[diedfromrtiidx, date_to_remove_daly_column] = pd.NaT + df.loc[diedfromrtiidx, 'rt_date_death_no_med'] = pd.NaT + df.loc[diedfromrtiidx, 'rt_MAIS_military_score'] = 0 + df.loc[diedfromrtiidx, 'rt_debugging_DALY_wt'] = 0 + df.loc[diedfromrtiidx, 'rt_in_shock'] = False + # reset whether they have been selected for an injury this month + df['rt_road_traffic_inc'] = False + + # --------------------------------- UPDATING OF RTI OVER TIME ------------------------------------------------- + # Currently we have the following conditions for being able to be involved in a road traffic injury, they are + # alive, they aren't currently injured, they didn't die immediately in + # a road traffic injury in the last month and finally, they aren't currently being treated for a road traffic + # injury. + rt_current_non_ind = df.index[df.is_alive & ~df.rt_road_traffic_inc & ~df.rt_imm_death & ~df.rt_med_int & + (df.rt_inj_severity == "none")] + + # ========= Update for people currently not involved in a RTI, make some involved in a RTI event ============== # Use linear model helper class eq = LinearModel(LinearModelType.MULTIPLICATIVE, self.base_1m_prob_rti, @@ -2867,143 +2975,203 @@ def apply(self, population): pred = eq.predict(df.loc[rt_current_non_ind]) random_draw_in_rti = self.module.rng.random_sample(size=len(rt_current_non_ind)) selected_for_rti = rt_current_non_ind[pred > random_draw_in_rti] + # Update to say they have been involved in a rti df.loc[selected_for_rti, 'rt_road_traffic_inc'] = True # Set the date that people were injured to now df.loc[selected_for_rti, 'rt_date_inj'] = now - # ========================= Take those involved in a RTI and assign some to death ============================== - # This section accounts for pre-hospital mortality, where a person is so severy injured that they die before - # being able to seek medical care - selected_to_die = selected_for_rti[self.imm_death_proportion_rti > - self.module.rng.random_sample(size=len(selected_for_rti))] - # Keep track of who experience pre-hospital mortality with the property rt_imm_death - df.loc[selected_to_die, 'rt_imm_death'] = True - # For each person selected to experience pre-hospital mortality, schedule an InstantaneosDeath event - for individual_id in selected_to_die: - self.sim.modules['Demography'].do_death(individual_id=individual_id, cause="RTI_imm_death", - originating_module=self.module) - # ============= Take those remaining people involved in a RTI and assign injuries to them ================== - # Drop those who have died immediately - selected_for_rti_inj_idx = selected_for_rti.drop(selected_to_die) - # Check that none remain - assert len(selected_for_rti_inj_idx.intersection(selected_to_die)) == 0 - # take a copy dataframe, used to get the index of the population affected by RTI - selected_for_rti_inj = df.loc[selected_for_rti_inj_idx] - # Again make sure that those who have injuries assigned to them are alive, involved in a crash and didn't die on - # scene - selected_for_rti_inj = selected_for_rti_inj.loc[df.is_alive & df.rt_road_traffic_inc & ~df.rt_imm_death] - # To stop people who have died from causes outside of the RTI module progressing through the model, remove - # any person with the condition 'cause_of_death' is not null - died_elsewhere_index = selected_for_rti_inj[~ selected_for_rti_inj['cause_of_death'].isnull()].index - # drop the died_elsewhere_index from selected_for_rti_inj - selected_for_rti_inj.drop(died_elsewhere_index, inplace=True) - # Create shorthand link to RTI module - road_traffic_injuries = self.sim.modules['RTI'] - - # if people have been chosen to be injured, assign the injuries using the assign injuries function - description = road_traffic_injuries.rti_assign_injuries(len(selected_for_rti_inj)) - # replace the nan values with 'none', this is so that the injuries can be copied over from this temporarily used - # pandas dataframe will fit in with the categories in the columns rt_injury_1 through rt_injury_8 - description = description.replace('nan', 'none') - # set the index of the description dataframe, so that we can join it to the selected_for_rti_inj dataframe - description = description.set_index(selected_for_rti_inj.index) - # copy over values from the assign injury dataframe to self.sim.population.props - - df.loc[selected_for_rti_inj.index, 'rt_ISS_score'] = \ - description.loc[selected_for_rti_inj.index, 'ISS'].astype(int) - df.loc[selected_for_rti_inj.index, 'rt_MAIS_military_score'] = \ - description.loc[selected_for_rti_inj.index, 'MAIS'].astype(int) - # ======================== Apply the injuries to the population dataframe ====================================== - # Find the corresponding column names - injury_columns = pd.Index(RTI.INJURY_COLUMNS) - matching_columns = description.columns.intersection(injury_columns) - for col in matching_columns: - df.loc[selected_for_rti_inj.index, col] = description.loc[selected_for_rti_inj.index, col] - # Run assert statements to make sure the model is behaving as it should - # All those who are injured in a road traffic accident have this noted in the property 'rt_road_traffic_inc' - assert df.loc[selected_for_rti, 'rt_road_traffic_inc'].all() - # All those who are involved in a road traffic accident have these noted in the property 'rt_date_inj' - assert (df.loc[selected_for_rti, 'rt_date_inj'] != pd.NaT).all() - # All those who are injures and do not die immediately have an ISS score > 0 - assert len(df.loc[df.rt_road_traffic_inc & ~df.rt_imm_death, 'rt_ISS_score'] > 0) == \ - len(df.loc[df.rt_road_traffic_inc & ~df.rt_imm_death]) - # ========================== Determine who will experience shock from blood loss ============================== - internal_bleeding_codes = ['361', '363', '461', '463', '813bo', '813co', '813do', '813eo'] - df = self.sim.population.props + + if self.sim.modules['RTI'].parameters['use_RTI_emulator']: + + if len(selected_for_rti)>0: + # This is where we want to replace normal course of events for RTI with emulator. + + # First, sample outcomes for individuals which were selected_for_rti. + # For now, don't consider properties of individual when sampling outcome. All we care about is the number of samples. + condition_for_Rti = Condition( + num_rows=len(selected_for_rti), + column_values=self.sim.modules['RTI'].HS_conditions + ) + NN_model = self.sim.modules['RTI'].RTI_emulator.sample_from_conditions( + conditions=[condition_for_Rti], + ) - potential_shock_index, _ = \ - road_traffic_injuries.rti_find_and_count_injuries(df.loc[df.rt_road_traffic_inc, RTI.INJURY_COLUMNS], - internal_bleeding_codes) - rand_for_shock = self.module.rng.random_sample(len(potential_shock_index)) - shock_index = potential_shock_index[self.prob_bleeding_leads_to_shock > rand_for_shock] - df.loc[shock_index, 'rt_in_shock'] = True - # log the percentage of those with RTIs in shock - percent_in_shock = \ - len(shock_index) / len(selected_for_rti_inj) if len(selected_for_rti_inj) > 0 else float("nan") - logger.info(key='Percent_of_shock_in_rti', - data={'Percent_of_shock_in_rti': percent_in_shock}, - description='The percentage of those assigned injuries who were also assign the shock property') - # ========================== Decide survival time without medical intervention ================================ - # todo: find better time for survival data without med int for ISS scores - # Assign a date in the future for which when the simulation reaches that date, the person's mortality will be - # checked if they haven't sought care - df.loc[selected_for_rti_inj.index, 'rt_date_death_no_med'] = now + DateOffset(days=7) - # ============================ Injury severity classification ================================================= - # Find those with mild injuries and update the rt_inj_severity property so they have a mild injury - injured_this_month = df.loc[selected_for_rti_inj.index] - mild_rti_idx = injured_this_month.index[injured_this_month.is_alive & injured_this_month['rt_ISS_score'] < 15] - df.loc[mild_rti_idx, 'rt_inj_severity'] = 'mild' - # Find those with severe injuries and update the rt_inj_severity property so they have a severe injury - severe_rti_idx = injured_this_month.index[injured_this_month['rt_ISS_score'] >= 15] - df.loc[severe_rti_idx, 'rt_inj_severity'] = 'severe' - # check that everyone who has been assigned an injury this month has an associated injury severity - assert sum(df.loc[df.rt_road_traffic_inc & ~df.rt_imm_death & (df.rt_date_inj == now), 'rt_inj_severity'] - != 'none') == len(selected_for_rti_inj.index) - # Find those with polytrauma and update the rt_polytrauma property so they have polytrauma - polytrauma_idx = description.loc[description.Polytrauma].index - df.loc[polytrauma_idx, 'rt_polytrauma'] = True - # Assign daly weights for each person's injuries with the function rti_assign_daly_weights - road_traffic_injuries.rti_assign_daly_weights(selected_for_rti_inj.index) - - # =============================== Health seeking behaviour set up ======================================= - # Set up health seeking behaviour. Two symptoms are used in the RTI module, the generic injury symptom and an - # emergency symptom 'severe_trauma'. - - # The condition to be sent to the health care system: 1) They must be alive 2) They must have been involved in a - # road traffic accident 3) they must have not died immediately in the accident 4) they must not have been to an - # A and E department previously and been diagnosed - - # The symptom they are assigned depends injury severity, those with mild injuries will be assigned the generic - # symptom, those with severe injuries will have the emergency injury symptom - - # Create the logical conditions for each symptom - condition_to_be_sent_to_em = \ - df.is_alive & df.rt_road_traffic_inc & ~df.rt_diagnosed & ~df.rt_imm_death & (df.rt_date_inj == now) & \ - (df.rt_injury_1 != "none") & (df.rt_ISS_score >= self.rt_emergency_care_ISS_score_cut_off) - condition_to_be_sent_to_begin_non_emergency = \ - df.is_alive & df.rt_road_traffic_inc & ~df.rt_diagnosed & ~df.rt_imm_death & (df.rt_date_inj == now) & \ - (df.rt_injury_1 != "none") & (df.rt_ISS_score < self.rt_emergency_care_ISS_score_cut_off) - # check that all those who meet the conditions to try and seek healthcare have at least one injury - assert sum(df.loc[condition_to_be_sent_to_em, 'rt_injury_1'] != "none") == \ - len(df.loc[condition_to_be_sent_to_em]) - assert sum(df.loc[condition_to_be_sent_to_begin_non_emergency, 'rt_injury_1'] != "none") == \ - len(df.loc[condition_to_be_sent_to_begin_non_emergency]) - # create indexes of people to be assigned each rti symptom - em_idx = df.index[condition_to_be_sent_to_em] - non_em_idx = df.index[condition_to_be_sent_to_begin_non_emergency] - # Assign the symptoms - self.sim.modules['SymptomManager'].change_symptom( - person_id=em_idx.tolist(), - disease_module=self.module, - add_or_remove='+', - symptom_string='severe_trauma', - ) - self.sim.modules['SymptomManager'].change_symptom( - person_id=non_em_idx.tolist(), - disease_module=self.module, - add_or_remove='+', - symptom_string='injury', - ) + # HS USAGE + # Get the total number of different types of appts that will be accessed as a result of this polling event and add to rolling count. + for column in self.sim.modules['RTI'].HS_Use_Type: + self.sim.modules['RTI'].HS_Use_by_RTI[column] += NN_model[column].sum() # Sum all values in the column + + # Change current properties of the individual and schedule resolution of event. + count = 0 + for person_id in selected_for_rti: + + # These properties are determined by the NN sampling + is_alive_after_RTI = NN_model.loc[count,'is_alive_after_RTI'] + + # Why does this require an int wrapper to work with DateOffset? + duration_days = int(NN_model.loc[count,'duration_days']) + + # Individual experiences an immediate death + if is_alive_after_RTI is False and duration_days == 0: + + # For each person selected to experience pre-hospital mortality, schedule an InstantaneosDeath event + self.sim.modules['Demography'].do_death(individual_id=individual_id, cause="RTI_imm_death", + originating_module=self.module) + + # Else individual doesn't immediately die, therefore schedule resolution + else: + # Set disability to what will be the average over duration of the episodef + df.loc[person_id,'rt_disability'] = NN_model.loc[count,'rt_disability_average'] + + # Make sure this person is not 'discoverable' by polling event next month. + #df.loc[person_id,'rt_inj_severity'] = 'mild' # instead of "none", but actually we don't know how severe it is + + # Schedule resolution + if is_alive_after_RTI: + # Store permanent disability incurred now to be accessed when Recovery Event is invoked. + df.loc[person_id,'rt_disability_permanent'] = NN_model.loc[count,'rt_disability_permanent'] + self.sim.schedule_event(RTI_NNResolution_Recovery_Event(self.module, person_id), df.loc[person_id, 'rt_date_inj'] + DateOffset(days=duration_days)) + else: + self.sim.schedule_event(RTI_NNResolution_Death_Event(self.module, person_id), df.loc[person_id, 'rt_date_inj'] + DateOffset(days=duration_days)) + + count += 1 + + else: + + # ========================= Take those involved in a RTI and assign some to death ============================== + # This section accounts for pre-hospital mortality, where a person is so severy injured that they die before + # being able to seek medical care + selected_to_die = selected_for_rti[self.imm_death_proportion_rti > + self.module.rng.random_sample(size=len(selected_for_rti))] + # Keep track of who experience pre-hospital mortality with the property rt_imm_death + df.loc[selected_to_die, 'rt_imm_death'] = True + # For each person selected to experience pre-hospital mortality, schedule an InstantaneosDeath event + for individual_id in selected_to_die: + self.sim.modules['Demography'].do_death(individual_id=individual_id, cause="RTI_imm_death", + originating_module=self.module) + + # ============= Take those remaining people involved in a RTI and assign injuries to them ================== + # Drop those who have died immediately + selected_for_rti_inj_idx = selected_for_rti.drop(selected_to_die) + # Check that none remain + assert len(selected_for_rti_inj_idx.intersection(selected_to_die)) == 0 + # take a copy dataframe, used to get the index of the population affected by RTI + selected_for_rti_inj = df.loc[selected_for_rti_inj_idx] + # Again make sure that those who have injuries assigned to them are alive, involved in a crash and didn't die on + # scene + selected_for_rti_inj = selected_for_rti_inj.loc[df.is_alive & df.rt_road_traffic_inc & ~df.rt_imm_death] + # To stop people who have died from causes outside of the RTI module progressing through the model, remove + # any person with the condition 'cause_of_death' is not null + died_elsewhere_index = selected_for_rti_inj[~ selected_for_rti_inj['cause_of_death'].isnull()].index + # drop the died_elsewhere_index from selected_for_rti_inj + selected_for_rti_inj.drop(died_elsewhere_index, inplace=True) + # Create shorthand link to RTI module + road_traffic_injuries = self.sim.modules['RTI'] + + # if people have been chosen to be injured, assign the injuries using the assign injuries function + description = road_traffic_injuries.rti_assign_injuries(len(selected_for_rti_inj)) + # replace the nan values with 'none', this is so that the injuries can be copied over from this temporarily used + # pandas dataframe will fit in with the categories in the columns rt_injury_1 through rt_injury_8 + description = description.replace('nan', 'none') + # set the index of the description dataframe, so that we can join it to the selected_for_rti_inj dataframe + description = description.set_index(selected_for_rti_inj.index) + # copy over values from the assign injury dataframe to self.sim.population.props + + df.loc[selected_for_rti_inj.index, 'rt_ISS_score'] = \ + description.loc[selected_for_rti_inj.index, 'ISS'].astype(int) + df.loc[selected_for_rti_inj.index, 'rt_MAIS_military_score'] = \ + description.loc[selected_for_rti_inj.index, 'MAIS'].astype(int) + # ======================== Apply the injuries to the population dataframe ====================================== + # Find the corresponding column names + injury_columns = pd.Index(RTI.INJURY_COLUMNS) + matching_columns = description.columns.intersection(injury_columns) + for col in matching_columns: + df.loc[selected_for_rti_inj.index, col] = description.loc[selected_for_rti_inj.index, col] + # Run assert statements to make sure the model is behaving as it should + # All those who are injured in a road traffic accident have this noted in the property 'rt_road_traffic_inc' + assert df.loc[selected_for_rti, 'rt_road_traffic_inc'].all() + # All those who are involved in a road traffic accident have these noted in the property 'rt_date_inj' + assert (df.loc[selected_for_rti, 'rt_date_inj'] != pd.NaT).all() + # All those who are injures and do not die immediately have an ISS score > 0 + assert len(df.loc[df.rt_road_traffic_inc & ~df.rt_imm_death, 'rt_ISS_score'] > 0) == \ + len(df.loc[df.rt_road_traffic_inc & ~df.rt_imm_death]) + # ========================== Determine who will experience shock from blood loss ============================== + internal_bleeding_codes = ['361', '363', '461', '463', '813bo', '813co', '813do', '813eo'] + df = self.sim.population.props + + potential_shock_index, _ = \ + road_traffic_injuries.rti_find_and_count_injuries(df.loc[df.rt_road_traffic_inc, RTI.INJURY_COLUMNS], + internal_bleeding_codes) + rand_for_shock = self.module.rng.random_sample(len(potential_shock_index)) + shock_index = potential_shock_index[self.prob_bleeding_leads_to_shock > rand_for_shock] + df.loc[shock_index, 'rt_in_shock'] = True + # log the percentage of those with RTIs in shock + percent_in_shock = \ + len(shock_index) / len(selected_for_rti_inj) if len(selected_for_rti_inj) > 0 else float("nan") + logger.info(key='Percent_of_shock_in_rti', + data={'Percent_of_shock_in_rti': percent_in_shock}, + description='The percentage of those assigned injuries who were also assign the shock property') + # ========================== Decide survival time without medical intervention ================================ + # todo: find better time for survival data without med int for ISS scores + # Assign a date in the future for which when the simulation reaches that date, the person's mortality will be + # checked if they haven't sought care + df.loc[selected_for_rti_inj.index, 'rt_date_death_no_med'] = now + DateOffset(days=7) + # ============================ Injury severity classification ================================================= + # Find those with mild injuries and update the rt_inj_severity property so they have a mild injury + injured_this_month = df.loc[selected_for_rti_inj.index] + mild_rti_idx = injured_this_month.index[injured_this_month.is_alive & injured_this_month['rt_ISS_score'] < 15] + df.loc[mild_rti_idx, 'rt_inj_severity'] = 'mild' + # Find those with severe injuries and update the rt_inj_severity property so they have a severe injury + severe_rti_idx = injured_this_month.index[injured_this_month['rt_ISS_score'] >= 15] + df.loc[severe_rti_idx, 'rt_inj_severity'] = 'severe' + # check that everyone who has been assigned an injury this month has an associated injury severity + assert sum(df.loc[df.rt_road_traffic_inc & ~df.rt_imm_death & (df.rt_date_inj == now), 'rt_inj_severity'] + != 'none') == len(selected_for_rti_inj.index) + # Find those with polytrauma and update the rt_polytrauma property so they have polytrauma + polytrauma_idx = description.loc[description.Polytrauma].index + df.loc[polytrauma_idx, 'rt_polytrauma'] = True + # Assign daly weights for each person's injuries with the function rti_assign_daly_weights + road_traffic_injuries.rti_assign_daly_weights(selected_for_rti_inj.index) + + # =============================== Health seeking behaviour set up ======================================= + # Set up health seeking behaviour. Two symptoms are used in the RTI module, the generic injury symptom and an + # emergency symptom 'severe_trauma'. + + # The condition to be sent to the health care system: 1) They must be alive 2) They must have been involved in a + # road traffic accident 3) they must have not died immediately in the accident 4) they must not have been to an + # A and E department previously and been diagnosed + + # The symptom they are assigned depends injury severity, those with mild injuries will be assigned the generic + # symptom, those with severe injuries will have the emergency injury symptom + + # Create the logical conditions for each symptom + condition_to_be_sent_to_em = \ + df.is_alive & df.rt_road_traffic_inc & ~df.rt_diagnosed & ~df.rt_imm_death & (df.rt_date_inj == now) & \ + (df.rt_injury_1 != "none") & (df.rt_ISS_score >= self.rt_emergency_care_ISS_score_cut_off) + condition_to_be_sent_to_begin_non_emergency = \ + df.is_alive & df.rt_road_traffic_inc & ~df.rt_diagnosed & ~df.rt_imm_death & (df.rt_date_inj == now) & \ + (df.rt_injury_1 != "none") & (df.rt_ISS_score < self.rt_emergency_care_ISS_score_cut_off) + # check that all those who meet the conditions to try and seek healthcare have at least one injury + assert sum(df.loc[condition_to_be_sent_to_em, 'rt_injury_1'] != "none") == \ + len(df.loc[condition_to_be_sent_to_em]) + assert sum(df.loc[condition_to_be_sent_to_begin_non_emergency, 'rt_injury_1'] != "none") == \ + len(df.loc[condition_to_be_sent_to_begin_non_emergency]) + # create indexes of people to be assigned each rti symptom + em_idx = df.index[condition_to_be_sent_to_em] + non_em_idx = df.index[condition_to_be_sent_to_begin_non_emergency] + # Assign the symptoms + self.sim.modules['SymptomManager'].change_symptom( + person_id=em_idx.tolist(), + disease_module=self.module, + add_or_remove='+', + symptom_string='severe_trauma', + ) + self.sim.modules['SymptomManager'].change_symptom( + person_id=non_em_idx.tolist(), + disease_module=self.module, + add_or_remove='+', + symptom_string='injury', + ) class RTI_Check_Death_No_Med(RegularEvent, PopulationScopeEventMixin): @@ -5383,6 +5551,28 @@ def apply(self, person_id): assert mortality_checked, 'Something missing in criteria' +class RTI_NNResolution_Death_Event(Event, IndividualScopeEventMixin): + """This is an individual-level event that determines the end of the incindent for individual via death""" + def __init__(self, module, person_id): + super().__init__(module, person_id=person_id) + def apply(self, person_id): + # For now invoking RTI_death_with_med, although technically we don't know whether individual accessed HSI or not. + # How finely do we want to really resolve RTI deaths? + self.sim.modules['Demography'].do_death(individual_id=person_id, cause="RTI_death_with_med", + originating_module=self.module) + +class RTI_NNResolution_Recovery_Event(Event, IndividualScopeEventMixin): + """This is an individual-level event that determines the end of the incindent for individual via death""" + def __init__(self, module, person_id): + super().__init__(module, person_id=person_id) + def apply(self, person_id): + df = self.sim.population.props + # Updat rt disability with long term outcome of accident + df.loc[person_id,'rt_disability'] = df.loc[person_id,'rt_disability_permanent'] + # Ensure that this person will be 'eligible' for rti injury again + df.loc[person_id,'rt_road_traffic_inc'] = False + + class RTI_No_Lifesaving_Medical_Intervention_Death_Event(Event, IndividualScopeEventMixin): """This is the NoMedicalInterventionDeathEvent. It is scheduled by the MedicalInterventionEvent which determines the resources required to treat that person and if they aren't present, the person is sent here. This function is also diff --git a/src/tlo/simulation.py b/src/tlo/simulation.py index d2560f92d9..b63a3700ce 100644 --- a/src/tlo/simulation.py +++ b/src/tlo/simulation.py @@ -358,7 +358,6 @@ def _update_progress_bar(self, progress_bar: ProgressBar, date: Date) -> None: def run_simulation_to(self, *, to_date: Date) -> None: """Run simulation up to a specified date. - Unlike :py:meth:`simulate` this method does not initialise or finalise simulation and the date simulated to can be any date before or equal to simulation end date. @@ -366,6 +365,7 @@ def run_simulation_to(self, *, to_date: Date) -> None: :param to_date: Date to simulate up to but not including - must be before or equal to simulation end date specified in call to :py:meth:`initialise`. """ + if not self._initialised: msg = "Simulation must be initialised before calling run_simulation_to" raise SimulationNotInitialisedError(msg)