From 9a54cd70cbbd3e9a36df7901cbb7d8993dcc24a4 Mon Sep 17 00:00:00 2001 From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com> Date: Mon, 9 Dec 2024 14:51:20 +0000 Subject: [PATCH 01/25] Start modifying RTI module to include NN option --- src/tlo/methods/rti.py | 74 +++++++++++++++++++++++++++++++++++++----- 1 file changed, 66 insertions(+), 8 deletions(-) diff --git a/src/tlo/methods/rti.py b/src/tlo/methods/rti.py index 68ef59fcf0..01aed5e7d3 100644 --- a/src/tlo/methods/rti.py +++ b/src/tlo/methods/rti.py @@ -2867,21 +2867,79 @@ def apply(self, population): pred = eq.predict(df.loc[rt_current_non_ind]) random_draw_in_rti = self.module.rng.random_sample(size=len(rt_current_non_ind)) selected_for_rti = rt_current_non_ind[pred > random_draw_in_rti] + # Update to say they have been involved in a rti df.loc[selected_for_rti, 'rt_road_traffic_inc'] = True # Set the date that people were injured to now df.loc[selected_for_rti, 'rt_date_inj'] = now + + # This is where we want to replace normal course of events with emulator + # We have to: + # 1. Sample new properties for individual + + # For now, don't consider properties of individual when sampling outcome. All we care about is the number of samples. + # NN_model = sample_NN_model(len(selected_for_rti)) + + # 2. Change current properties of the individual + count = 0 + + for person_id in selected_for_rti: + + if NN_model.loc[count,'is_alive_after_RTI'] is False and NN_model.loc[count,'duration_days'] == 0: + + # Keep track of who experience pre-hospital mortality with the property rt_imm_death + df.loc[person_id, 'rt_imm_death'] = True + # For each person selected to experience pre-hospital mortality, schedule an InstantaneosDeath event + self.sim.modules['Demography'].do_death(individual_id=individual_id, cause="RTI_imm_death", + originating_module=self.module) + else: + # Set disability to what will be the average over duration of the episode + df.loc[person_id,'rt_dalys'] = NN_model.loc[count,'RT_disability_average'] + + # Schedule resolution + self.sim.schedule_event(RTI_Emulated_Resolution(self.module, person_id, is_alive_after_RT, rt_disability_permament), self.sim.date + + DateOffset(days=duration_days)) + + # Add HS use to running count + #self.running_counter_of_HSIs_used += HSI_counters + + count += 1 + + # 3. Schedule resolution of event + log of HS usage by individual + + # N_samples = len(selected_for_rti) + # For all of these, select + + # if is_alive_after_RTI == False and duration_days == 0: + # Keep track of who experience pre-hospital mortality with the property rt_imm_death + df.loc[selected_to_die, 'rt_imm_death'] = True + # For each person selected to experience pre-hospital mortality, schedule an InstantaneosDeath event + for individual_id in selected_to_die: + self.sim.modules['Demography'].do_death(individual_id=individual_id, cause="RTI_imm_death", + originating_module=self.module) + + # For all remaining individuals, + + # UPDATED # ========================= Take those involved in a RTI and assign some to death ============================== + + # REMOVE BECAUSE OUTDATED # This section accounts for pre-hospital mortality, where a person is so severy injured that they die before # being able to seek medical care - selected_to_die = selected_for_rti[self.imm_death_proportion_rti > - self.module.rng.random_sample(size=len(selected_for_rti))] - # Keep track of who experience pre-hospital mortality with the property rt_imm_death - df.loc[selected_to_die, 'rt_imm_death'] = True - # For each person selected to experience pre-hospital mortality, schedule an InstantaneosDeath event - for individual_id in selected_to_die: - self.sim.modules['Demography'].do_death(individual_id=individual_id, cause="RTI_imm_death", - originating_module=self.module) + #selected_to_die = selected_for_rti[self.imm_death_proportion_rti > + # self.module.rng.random_sample(size=len(selected_for_rti))] + + + + + # All of the following can be removed under the assumption that this data is capturing the incidence as well. + # Replace with + # Update of current relevant variables + + # Those that are sampled are + + + # ============= Take those remaining people involved in a RTI and assign injuries to them ================== # Drop those who have died immediately selected_for_rti_inj_idx = selected_for_rti.drop(selected_to_die) From 0abcdb56e2b88ed273e7e8ca6a5854c4483be465 Mon Sep 17 00:00:00 2001 From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com> Date: Wed, 11 Dec 2024 10:14:06 +0000 Subject: [PATCH 02/25] Add emulated HSI count to normal HS logger --- src/tlo/methods/healthsystem.py | 27 ++ src/tlo/methods/rti.py | 419 ++++++++++++++++++-------------- 2 files changed, 265 insertions(+), 181 deletions(-) diff --git a/src/tlo/methods/healthsystem.py b/src/tlo/methods/healthsystem.py index 5c6b2022e1..4341eff6fd 100644 --- a/src/tlo/methods/healthsystem.py +++ b/src/tlo/methods/healthsystem.py @@ -1982,6 +1982,31 @@ def on_end_of_year(self) -> None: and self.parameters['scale_to_effective_capabilities'] ): self._rescale_capabilities_to_capture_effective_capability() + + # Add emulated appts to real ones in HS summary counters before logging the latter. + # Notes: + # The emulated count is defined in the disease module itself, rather than in the HS, but + # is reset at the HS level after the emulated appts have been added to the total count. + # I *think* this is correct, but we may wish to discuss further. + + # To total appt count + for key, value in self.sim.modules['RTI'].HS_Use_by_RTI.items(): + # Extract the category part (ignoring the level) + _, category = key.split('_', 1) + self._summary_counter._appts[category] += value + + # To appt countbroken down by level + for key, value in self.sim.modules['RTI'].HS_Use_by_RTI.items(): + # Split key into level and category + level, category = key.split('_', 1) + level_number = level.replace("Level", "") + + if level_number in self._summary_counter._appts_by_level: # Ensure the level exists in the second dictionary + self._summary_counter._appts_by_level[level_number][category] += value + + # Reset emulator counter. + self.sim.modules['RTI'].HS_Use_by_RTI = Counter({col: 0 for col in module.HS_Use_Type}) + self._summary_counter.write_to_log_and_reset_counters() self.consumables.on_end_of_year() self.bed_days.on_end_of_year() @@ -1989,6 +2014,7 @@ def on_end_of_year(self) -> None: self._write_hsi_event_counts_to_log_and_reset() self._write_never_ran_hsi_event_counts_to_log_and_reset() + def run_population_level_events(self, _list_of_population_hsi_event_tuples: List[HSIEventQueueItem]) -> None: """Run a list of population level events.""" while len(_list_of_population_hsi_event_tuples) > 0: @@ -2758,6 +2784,7 @@ def record_hs_status( def write_to_log_and_reset_counters(self): """Log summary statistics reset the data structures. This usually occurs at the end of the year.""" + logger_summary.info( key="HSI_Event", description="Counts of the HSI_Events that have occurred in this calendar year by TREATMENT_ID, " diff --git a/src/tlo/methods/rti.py b/src/tlo/methods/rti.py index 01aed5e7d3..a43dfa2f26 100644 --- a/src/tlo/methods/rti.py +++ b/src/tlo/methods/rti.py @@ -6,11 +6,13 @@ from pathlib import Path from typing import TYPE_CHECKING, List +from collections import Counter import numpy as np import pandas as pd -from tlo import DateOffset, Module, Parameter, Property, Types, logging +from tlo import DateOffset, Module, Parameter, Property, Types, logging, Date + from tlo.events import Event, IndividualScopeEventMixin, PopulationScopeEventMixin, RegularEvent from tlo.lm import LinearModel, LinearModelType, Predictor from tlo.methods import Metadata @@ -51,6 +53,16 @@ def __init__(self, name=None, resourcefilepath=None): 'Lifestyle', 'HealthSystem', } + + # Counters tracking use of HealthSystem by RTI module under use of emulator + HS_Use_Type = [ + 'Level2_AccidentsandEmerg', 'Level2_DiagRadio', 'Level2_EPI', + 'Level2_IPAdmission', 'Level2_InpatientDays', 'Level2_MajorSurg', + 'Level2_MinorSurg', 'Level2_Over5OPD', 'Level2_Tomography', 'Level2_Under5OPD' + ] + + # Initialize the counter with all items set to 0 + HS_Use_by_RTI = Counter({col: 0 for col in HS_Use_Type}) INJURY_INDICES = range(1, 9) @@ -1073,6 +1085,7 @@ def __init__(self, name=None, resourcefilepath=None): 'being able to be provided'), 'rt_recovery_no_med': Property(Types.BOOL, 'recovery without medical intervention True/False'), 'rt_disability': Property(Types.REAL, 'disability weight for current month'), + 'rt_disability_permanent': Property(Types.REAL, 'disability weight incurred permanently'), 'rt_date_inj': Property(Types.DATE, 'date of latest injury'), 'rt_med_int': Property(Types.BOOL, 'whether this person is currently undergoing medical treatment'), 'rt_in_icu_or_hdu': Property(Types.BOOL, 'whether this person is currently in ICU for RTI'), @@ -1492,6 +1505,7 @@ def initialise_population(self, population): df.loc[df.is_alive, 'rt_no_med_death'] = False df.loc[df.is_alive, 'rt_unavailable_med_death'] = False df.loc[df.is_alive, 'rt_disability'] = 0 # default: no DALY + df.loc[df.is_alive, 'rt_disability_permanent'] = 0 # default: no DALY df.loc[df.is_alive, 'rt_date_inj'] = pd.NaT df.loc[df.is_alive, 'rt_med_int'] = False df.loc[df.is_alive, 'rt_in_icu_or_hdu'] = False @@ -2036,6 +2050,7 @@ def rti_assign_daly_weights(self, injured_index): df.loc[DALYweightunderlimit, 'rt_disability'] = 0 assert (df.loc[injured_index, 'rt_disability'] > 0).all() + def rti_alter_daly_post_treatment(self, person_id, codes): """ This function removes the DALY weight associated with each injury code after treatment is complete. This @@ -2737,7 +2752,6 @@ def do_at_generic_first_appt_emergency( # that represent disease events for particular persons. # --------------------------------------------------------------------------------------------------------- - class RTIPollingEvent(RegularEvent, PopulationScopeEventMixin): """The regular RTI event which handles all the initial RTI related changes to the dataframe. It can be thought of as the actual road traffic accident occurring. Specifically the event decides who is involved in a road traffic @@ -2797,6 +2811,30 @@ def __init__(self, module): self.prob_bleeding_leads_to_shock = p['prob_bleeding_leads_to_shock'] self.rt_emergency_care_ISS_score_cut_off = p['rt_emergency_care_ISS_score_cut_off'] + + def sample_NN_model(self, N): + data = { + "is_alive_after_RTI": np.random.choice([True, False], size=N), + "duration_days": np.random.randint(0, 366, size=N), + "rt_disability_average": np.random.uniform(0, 1, size=N), + "rt_disability_permanent": np.random.uniform(0, 1, size=N), + + "Level2_AccidentsandEmerg": np.random.uniform(0, 3, size=N), + "Level2_DiagRadio": np.random.uniform(0, 3, size=N), + "Level2_EPI": np.random.uniform(0, 3, size=N), + "Level2_IPAdmission": np.random.uniform(0, 3, size=N), + "Level2_MajorSurg": np.random.uniform(0, 3, size=N), + "Level2_MinorSurg": np.random.uniform(0, 3, size=N), + "Level2_Over5OPD": np.random.uniform(0, 3, size=N), + "Level2_Tomography": np.random.uniform(0, 3, size=N), + "Level2_Under5OPD": np.random.uniform(0, 3, size=N), + + # Adding the 'Level2_InpatientDays' column with values between 0 and 200 + "Level2_InpatientDays": np.random.randint(0, 201, size=N) + } + + return pd.DataFrame(data) + def apply(self, population): """Apply this event to the population. @@ -2872,45 +2910,64 @@ def apply(self, population): df.loc[selected_for_rti, 'rt_road_traffic_inc'] = True # Set the date that people were injured to now df.loc[selected_for_rti, 'rt_date_inj'] = now - - # This is where we want to replace normal course of events with emulator - # We have to: - # 1. Sample new properties for individual - - # For now, don't consider properties of individual when sampling outcome. All we care about is the number of samples. - # NN_model = sample_NN_model(len(selected_for_rti)) - - # 2. Change current properties of the individual - count = 0 - - for person_id in selected_for_rti: - if NN_model.loc[count,'is_alive_after_RTI'] is False and NN_model.loc[count,'duration_days'] == 0: + # Decide whether to use emulator or not + use_emulator = True + + if use_emulator: + # This is where we want to replace normal course of events for RTI with emulator. + + # First, sample outcomes for individuals which were selected_for_rti. + # For now, don't consider properties of individual when sampling outcome. All we care about is the number of samples. + NN_model = self.sample_NN_model(len(selected_for_rti)) + + # HS USAGE + # Get the total number of different types of appts accessed as a result of this polling event and add to rolling count. + for column in self.sim.modules['RTI'].HS_Use_Type: + self.sim.modules['RTI'].HS_Use_by_RTI[column] += NN_model[column].sum() # Sum all values in the column + + # Change current properties of the individual and schedule resolution of event. + count = 0 + for person_id in selected_for_rti: - # Keep track of who experience pre-hospital mortality with the property rt_imm_death - df.loc[person_id, 'rt_imm_death'] = True - # For each person selected to experience pre-hospital mortality, schedule an InstantaneosDeath event - self.sim.modules['Demography'].do_death(individual_id=individual_id, cause="RTI_imm_death", - originating_module=self.module) - else: - # Set disability to what will be the average over duration of the episode - df.loc[person_id,'rt_dalys'] = NN_model.loc[count,'RT_disability_average'] + is_alive_after_RTI = NN_model.loc[count,'is_alive_after_RTI'] + # Why does this require an int wrapper to work with DateOffset? + duration_days = int(NN_model.loc[count,'duration_days']) - # Schedule resolution - self.sim.schedule_event(RTI_Emulated_Resolution(self.module, person_id, is_alive_after_RT, rt_disability_permament), self.sim.date + - DateOffset(days=duration_days)) - - # Add HS use to running count - #self.running_counter_of_HSIs_used += HSI_counters - - count += 1 - - # 3. Schedule resolution of event + log of HS usage by individual - - # N_samples = len(selected_for_rti) - # For all of these, select + # Individual experiences an immediate death + if is_alive_after_RTI is False and duration_days == 0: + # Keep track of who experience pre-hospital mortality with the property rt_imm_death + # Do we need to keep this? Could probably ignore if using emulator. + df.loc[person_id, 'rt_imm_death'] = True + # For each person selected to experience pre-hospital mortality, schedule an InstantaneosDeath event + self.sim.modules['Demography'].do_death(individual_id=individual_id, cause="RTI_imm_death", + originating_module=self.module) + + # Else individual doesn't immediately die, therefore schedule resolution + else: + # Set disability to what will be the average over duration of the episode + df.loc[person_id,'rt_disability'] = NN_model.loc[count,'rt_disability_average'] + + # Make sure this person is not 'discoverable' by polling event next month. + df.loc[person_id,'rt_inj_severity'] = 'mild' # instead of "none", but actually we don't know how severe it is + + # Schedule resolution + if is_alive_after_RTI: + # Store permanent disability incurred now to be accessed when Recovery Event is invoked. + df.loc[person_id,'rt_disability_permanent'] = NN_model.loc[count,'rt_disability_permanent'] + self.sim.schedule_event(RTI_NNResolution_Recovery_Event(self.module, person_id), self.sim.date + DateOffset(days=duration_days)) + else: + self.sim.schedule_event(RTI_NNResolution_Death_Event(self.module, person_id), self.sim.date + DateOffset(days=duration_days)) + + count += 1 - # if is_alive_after_RTI == False and duration_days == 0: + else: + + # ========================= Take those involved in a RTI and assign some to death ============================== + # This section accounts for pre-hospital mortality, where a person is so severy injured that they die before + # being able to seek medical care + selected_to_die = selected_for_rti[self.imm_death_proportion_rti > + self.module.rng.random_sample(size=len(selected_for_rti))] # Keep track of who experience pre-hospital mortality with the property rt_imm_death df.loc[selected_to_die, 'rt_imm_death'] = True # For each person selected to experience pre-hospital mortality, schedule an InstantaneosDeath event @@ -2918,150 +2975,128 @@ def apply(self, population): self.sim.modules['Demography'].do_death(individual_id=individual_id, cause="RTI_imm_death", originating_module=self.module) - # For all remaining individuals, - - # UPDATED - # ========================= Take those involved in a RTI and assign some to death ============================== - - # REMOVE BECAUSE OUTDATED - # This section accounts for pre-hospital mortality, where a person is so severy injured that they die before - # being able to seek medical care - #selected_to_die = selected_for_rti[self.imm_death_proportion_rti > - # self.module.rng.random_sample(size=len(selected_for_rti))] - - - - - # All of the following can be removed under the assumption that this data is capturing the incidence as well. - # Replace with - # Update of current relevant variables - - # Those that are sampled are - - - - # ============= Take those remaining people involved in a RTI and assign injuries to them ================== - # Drop those who have died immediately - selected_for_rti_inj_idx = selected_for_rti.drop(selected_to_die) - # Check that none remain - assert len(selected_for_rti_inj_idx.intersection(selected_to_die)) == 0 - # take a copy dataframe, used to get the index of the population affected by RTI - selected_for_rti_inj = df.loc[selected_for_rti_inj_idx] - # Again make sure that those who have injuries assigned to them are alive, involved in a crash and didn't die on - # scene - selected_for_rti_inj = selected_for_rti_inj.loc[df.is_alive & df.rt_road_traffic_inc & ~df.rt_imm_death] - # To stop people who have died from causes outside of the RTI module progressing through the model, remove - # any person with the condition 'cause_of_death' is not null - died_elsewhere_index = selected_for_rti_inj[~ selected_for_rti_inj['cause_of_death'].isnull()].index - # drop the died_elsewhere_index from selected_for_rti_inj - selected_for_rti_inj.drop(died_elsewhere_index, inplace=True) - # Create shorthand link to RTI module - road_traffic_injuries = self.sim.modules['RTI'] - - # if people have been chosen to be injured, assign the injuries using the assign injuries function - description = road_traffic_injuries.rti_assign_injuries(len(selected_for_rti_inj)) - # replace the nan values with 'none', this is so that the injuries can be copied over from this temporarily used - # pandas dataframe will fit in with the categories in the columns rt_injury_1 through rt_injury_8 - description = description.replace('nan', 'none') - # set the index of the description dataframe, so that we can join it to the selected_for_rti_inj dataframe - description = description.set_index(selected_for_rti_inj.index) - # copy over values from the assign injury dataframe to self.sim.population.props - - df.loc[selected_for_rti_inj.index, 'rt_ISS_score'] = \ - description.loc[selected_for_rti_inj.index, 'ISS'].astype(int) - df.loc[selected_for_rti_inj.index, 'rt_MAIS_military_score'] = \ - description.loc[selected_for_rti_inj.index, 'MAIS'].astype(int) - # ======================== Apply the injuries to the population dataframe ====================================== - # Find the corresponding column names - injury_columns = pd.Index(RTI.INJURY_COLUMNS) - matching_columns = description.columns.intersection(injury_columns) - for col in matching_columns: - df.loc[selected_for_rti_inj.index, col] = description.loc[selected_for_rti_inj.index, col] - # Run assert statements to make sure the model is behaving as it should - # All those who are injured in a road traffic accident have this noted in the property 'rt_road_traffic_inc' - assert df.loc[selected_for_rti, 'rt_road_traffic_inc'].all() - # All those who are involved in a road traffic accident have these noted in the property 'rt_date_inj' - assert (df.loc[selected_for_rti, 'rt_date_inj'] != pd.NaT).all() - # All those who are injures and do not die immediately have an ISS score > 0 - assert len(df.loc[df.rt_road_traffic_inc & ~df.rt_imm_death, 'rt_ISS_score'] > 0) == \ - len(df.loc[df.rt_road_traffic_inc & ~df.rt_imm_death]) - # ========================== Determine who will experience shock from blood loss ============================== - internal_bleeding_codes = ['361', '363', '461', '463', '813bo', '813co', '813do', '813eo'] - df = self.sim.population.props - - potential_shock_index, _ = \ - road_traffic_injuries.rti_find_and_count_injuries(df.loc[df.rt_road_traffic_inc, RTI.INJURY_COLUMNS], - internal_bleeding_codes) - rand_for_shock = self.module.rng.random_sample(len(potential_shock_index)) - shock_index = potential_shock_index[self.prob_bleeding_leads_to_shock > rand_for_shock] - df.loc[shock_index, 'rt_in_shock'] = True - # log the percentage of those with RTIs in shock - percent_in_shock = \ - len(shock_index) / len(selected_for_rti_inj) if len(selected_for_rti_inj) > 0 else float("nan") - logger.info(key='Percent_of_shock_in_rti', - data={'Percent_of_shock_in_rti': percent_in_shock}, - description='The percentage of those assigned injuries who were also assign the shock property') - # ========================== Decide survival time without medical intervention ================================ - # todo: find better time for survival data without med int for ISS scores - # Assign a date in the future for which when the simulation reaches that date, the person's mortality will be - # checked if they haven't sought care - df.loc[selected_for_rti_inj.index, 'rt_date_death_no_med'] = now + DateOffset(days=7) - # ============================ Injury severity classification ================================================= - # Find those with mild injuries and update the rt_inj_severity property so they have a mild injury - injured_this_month = df.loc[selected_for_rti_inj.index] - mild_rti_idx = injured_this_month.index[injured_this_month.is_alive & injured_this_month['rt_ISS_score'] < 15] - df.loc[mild_rti_idx, 'rt_inj_severity'] = 'mild' - # Find those with severe injuries and update the rt_inj_severity property so they have a severe injury - severe_rti_idx = injured_this_month.index[injured_this_month['rt_ISS_score'] >= 15] - df.loc[severe_rti_idx, 'rt_inj_severity'] = 'severe' - # check that everyone who has been assigned an injury this month has an associated injury severity - assert sum(df.loc[df.rt_road_traffic_inc & ~df.rt_imm_death & (df.rt_date_inj == now), 'rt_inj_severity'] - != 'none') == len(selected_for_rti_inj.index) - # Find those with polytrauma and update the rt_polytrauma property so they have polytrauma - polytrauma_idx = description.loc[description.Polytrauma].index - df.loc[polytrauma_idx, 'rt_polytrauma'] = True - # Assign daly weights for each person's injuries with the function rti_assign_daly_weights - road_traffic_injuries.rti_assign_daly_weights(selected_for_rti_inj.index) - - # =============================== Health seeking behaviour set up ======================================= - # Set up health seeking behaviour. Two symptoms are used in the RTI module, the generic injury symptom and an - # emergency symptom 'severe_trauma'. - - # The condition to be sent to the health care system: 1) They must be alive 2) They must have been involved in a - # road traffic accident 3) they must have not died immediately in the accident 4) they must not have been to an - # A and E department previously and been diagnosed - - # The symptom they are assigned depends injury severity, those with mild injuries will be assigned the generic - # symptom, those with severe injuries will have the emergency injury symptom - - # Create the logical conditions for each symptom - condition_to_be_sent_to_em = \ - df.is_alive & df.rt_road_traffic_inc & ~df.rt_diagnosed & ~df.rt_imm_death & (df.rt_date_inj == now) & \ - (df.rt_injury_1 != "none") & (df.rt_ISS_score >= self.rt_emergency_care_ISS_score_cut_off) - condition_to_be_sent_to_begin_non_emergency = \ - df.is_alive & df.rt_road_traffic_inc & ~df.rt_diagnosed & ~df.rt_imm_death & (df.rt_date_inj == now) & \ - (df.rt_injury_1 != "none") & (df.rt_ISS_score < self.rt_emergency_care_ISS_score_cut_off) - # check that all those who meet the conditions to try and seek healthcare have at least one injury - assert sum(df.loc[condition_to_be_sent_to_em, 'rt_injury_1'] != "none") == \ - len(df.loc[condition_to_be_sent_to_em]) - assert sum(df.loc[condition_to_be_sent_to_begin_non_emergency, 'rt_injury_1'] != "none") == \ - len(df.loc[condition_to_be_sent_to_begin_non_emergency]) - # create indexes of people to be assigned each rti symptom - em_idx = df.index[condition_to_be_sent_to_em] - non_em_idx = df.index[condition_to_be_sent_to_begin_non_emergency] - # Assign the symptoms - self.sim.modules['SymptomManager'].change_symptom( - person_id=em_idx.tolist(), - disease_module=self.module, - add_or_remove='+', - symptom_string='severe_trauma', - ) - self.sim.modules['SymptomManager'].change_symptom( - person_id=non_em_idx.tolist(), - disease_module=self.module, - add_or_remove='+', - symptom_string='injury', - ) + # ============= Take those remaining people involved in a RTI and assign injuries to them ================== + # Drop those who have died immediately + selected_for_rti_inj_idx = selected_for_rti.drop(selected_to_die) + # Check that none remain + assert len(selected_for_rti_inj_idx.intersection(selected_to_die)) == 0 + # take a copy dataframe, used to get the index of the population affected by RTI + selected_for_rti_inj = df.loc[selected_for_rti_inj_idx] + # Again make sure that those who have injuries assigned to them are alive, involved in a crash and didn't die on + # scene + selected_for_rti_inj = selected_for_rti_inj.loc[df.is_alive & df.rt_road_traffic_inc & ~df.rt_imm_death] + # To stop people who have died from causes outside of the RTI module progressing through the model, remove + # any person with the condition 'cause_of_death' is not null + died_elsewhere_index = selected_for_rti_inj[~ selected_for_rti_inj['cause_of_death'].isnull()].index + # drop the died_elsewhere_index from selected_for_rti_inj + selected_for_rti_inj.drop(died_elsewhere_index, inplace=True) + # Create shorthand link to RTI module + road_traffic_injuries = self.sim.modules['RTI'] + + # if people have been chosen to be injured, assign the injuries using the assign injuries function + description = road_traffic_injuries.rti_assign_injuries(len(selected_for_rti_inj)) + # replace the nan values with 'none', this is so that the injuries can be copied over from this temporarily used + # pandas dataframe will fit in with the categories in the columns rt_injury_1 through rt_injury_8 + description = description.replace('nan', 'none') + # set the index of the description dataframe, so that we can join it to the selected_for_rti_inj dataframe + description = description.set_index(selected_for_rti_inj.index) + # copy over values from the assign injury dataframe to self.sim.population.props + + df.loc[selected_for_rti_inj.index, 'rt_ISS_score'] = \ + description.loc[selected_for_rti_inj.index, 'ISS'].astype(int) + df.loc[selected_for_rti_inj.index, 'rt_MAIS_military_score'] = \ + description.loc[selected_for_rti_inj.index, 'MAIS'].astype(int) + # ======================== Apply the injuries to the population dataframe ====================================== + # Find the corresponding column names + injury_columns = pd.Index(RTI.INJURY_COLUMNS) + matching_columns = description.columns.intersection(injury_columns) + for col in matching_columns: + df.loc[selected_for_rti_inj.index, col] = description.loc[selected_for_rti_inj.index, col] + # Run assert statements to make sure the model is behaving as it should + # All those who are injured in a road traffic accident have this noted in the property 'rt_road_traffic_inc' + assert df.loc[selected_for_rti, 'rt_road_traffic_inc'].all() + # All those who are involved in a road traffic accident have these noted in the property 'rt_date_inj' + assert (df.loc[selected_for_rti, 'rt_date_inj'] != pd.NaT).all() + # All those who are injures and do not die immediately have an ISS score > 0 + assert len(df.loc[df.rt_road_traffic_inc & ~df.rt_imm_death, 'rt_ISS_score'] > 0) == \ + len(df.loc[df.rt_road_traffic_inc & ~df.rt_imm_death]) + # ========================== Determine who will experience shock from blood loss ============================== + internal_bleeding_codes = ['361', '363', '461', '463', '813bo', '813co', '813do', '813eo'] + df = self.sim.population.props + + potential_shock_index, _ = \ + road_traffic_injuries.rti_find_and_count_injuries(df.loc[df.rt_road_traffic_inc, RTI.INJURY_COLUMNS], + internal_bleeding_codes) + rand_for_shock = self.module.rng.random_sample(len(potential_shock_index)) + shock_index = potential_shock_index[self.prob_bleeding_leads_to_shock > rand_for_shock] + df.loc[shock_index, 'rt_in_shock'] = True + # log the percentage of those with RTIs in shock + percent_in_shock = \ + len(shock_index) / len(selected_for_rti_inj) if len(selected_for_rti_inj) > 0 else float("nan") + logger.info(key='Percent_of_shock_in_rti', + data={'Percent_of_shock_in_rti': percent_in_shock}, + description='The percentage of those assigned injuries who were also assign the shock property') + # ========================== Decide survival time without medical intervention ================================ + # todo: find better time for survival data without med int for ISS scores + # Assign a date in the future for which when the simulation reaches that date, the person's mortality will be + # checked if they haven't sought care + df.loc[selected_for_rti_inj.index, 'rt_date_death_no_med'] = now + DateOffset(days=7) + # ============================ Injury severity classification ================================================= + # Find those with mild injuries and update the rt_inj_severity property so they have a mild injury + injured_this_month = df.loc[selected_for_rti_inj.index] + mild_rti_idx = injured_this_month.index[injured_this_month.is_alive & injured_this_month['rt_ISS_score'] < 15] + df.loc[mild_rti_idx, 'rt_inj_severity'] = 'mild' + # Find those with severe injuries and update the rt_inj_severity property so they have a severe injury + severe_rti_idx = injured_this_month.index[injured_this_month['rt_ISS_score'] >= 15] + df.loc[severe_rti_idx, 'rt_inj_severity'] = 'severe' + # check that everyone who has been assigned an injury this month has an associated injury severity + assert sum(df.loc[df.rt_road_traffic_inc & ~df.rt_imm_death & (df.rt_date_inj == now), 'rt_inj_severity'] + != 'none') == len(selected_for_rti_inj.index) + # Find those with polytrauma and update the rt_polytrauma property so they have polytrauma + polytrauma_idx = description.loc[description.Polytrauma].index + df.loc[polytrauma_idx, 'rt_polytrauma'] = True + # Assign daly weights for each person's injuries with the function rti_assign_daly_weights + road_traffic_injuries.rti_assign_daly_weights(selected_for_rti_inj.index) + + # =============================== Health seeking behaviour set up ======================================= + # Set up health seeking behaviour. Two symptoms are used in the RTI module, the generic injury symptom and an + # emergency symptom 'severe_trauma'. + + # The condition to be sent to the health care system: 1) They must be alive 2) They must have been involved in a + # road traffic accident 3) they must have not died immediately in the accident 4) they must not have been to an + # A and E department previously and been diagnosed + + # The symptom they are assigned depends injury severity, those with mild injuries will be assigned the generic + # symptom, those with severe injuries will have the emergency injury symptom + + # Create the logical conditions for each symptom + condition_to_be_sent_to_em = \ + df.is_alive & df.rt_road_traffic_inc & ~df.rt_diagnosed & ~df.rt_imm_death & (df.rt_date_inj == now) & \ + (df.rt_injury_1 != "none") & (df.rt_ISS_score >= self.rt_emergency_care_ISS_score_cut_off) + condition_to_be_sent_to_begin_non_emergency = \ + df.is_alive & df.rt_road_traffic_inc & ~df.rt_diagnosed & ~df.rt_imm_death & (df.rt_date_inj == now) & \ + (df.rt_injury_1 != "none") & (df.rt_ISS_score < self.rt_emergency_care_ISS_score_cut_off) + # check that all those who meet the conditions to try and seek healthcare have at least one injury + assert sum(df.loc[condition_to_be_sent_to_em, 'rt_injury_1'] != "none") == \ + len(df.loc[condition_to_be_sent_to_em]) + assert sum(df.loc[condition_to_be_sent_to_begin_non_emergency, 'rt_injury_1'] != "none") == \ + len(df.loc[condition_to_be_sent_to_begin_non_emergency]) + # create indexes of people to be assigned each rti symptom + em_idx = df.index[condition_to_be_sent_to_em] + non_em_idx = df.index[condition_to_be_sent_to_begin_non_emergency] + # Assign the symptoms + self.sim.modules['SymptomManager'].change_symptom( + person_id=em_idx.tolist(), + disease_module=self.module, + add_or_remove='+', + symptom_string='severe_trauma', + ) + self.sim.modules['SymptomManager'].change_symptom( + person_id=non_em_idx.tolist(), + disease_module=self.module, + add_or_remove='+', + symptom_string='injury', + ) class RTI_Check_Death_No_Med(RegularEvent, PopulationScopeEventMixin): @@ -5415,6 +5450,28 @@ def apply(self, person_id): assert mortality_checked, 'Something missing in criteria' +class RTI_NNResolution_Death_Event(Event, IndividualScopeEventMixin): + """This is an individual-level event that determines the end of the incindent for individual via death""" + def __init__(self, module, person_id): + super().__init__(module, person_id=person_id) + def apply(self, person_id): + # For now invoking RTI_death_with_med, although technically we don't know whether individual accessed HSI or not. + # How finely do we want to really resolve RTI deaths? + self.sim.modules['Demography'].do_death(individual_id=person_id, cause="RTI_death_with_med", + originating_module=self.module) + +class RTI_NNResolution_Recovery_Event(Event, IndividualScopeEventMixin): + """This is an individual-level event that determines the end of the incindent for individual via death""" + def __init__(self, module, person_id): + super().__init__(module, person_id=person_id) + def apply(self, person_id): + df = self.sim.population.props + # Updat rt disability with long term outcome of accident + df.loc[person_id,'rt_disability'] = df.loc[person_id,'rt_disability_permanent'] + # Ensure that this person will be 'eligible' for rti injury again + df.loc[person_id,'rt_inj_severity'] = "none" + + class RTI_No_Lifesaving_Medical_Intervention_Death_Event(Event, IndividualScopeEventMixin): """This is the NoMedicalInterventionDeathEvent. It is scheduled by the MedicalInterventionEvent which determines the resources required to treat that person and if they aren't present, the person is sent here. This function is also From 84717cb6f1320a202b6d82170e7b10e06839dd05 Mon Sep 17 00:00:00 2001 From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com> Date: Wed, 11 Dec 2024 10:44:26 +0000 Subject: [PATCH 03/25] Although currently all people selected for an rti are assumed to experience the incident at the time of the polling, ensure that the end of the episode is calculated from the stored incident time in case this is updated in the future. --- src/tlo/methods/rti.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/tlo/methods/rti.py b/src/tlo/methods/rti.py index a43dfa2f26..9f0b66c968 100644 --- a/src/tlo/methods/rti.py +++ b/src/tlo/methods/rti.py @@ -2955,9 +2955,9 @@ def apply(self, population): if is_alive_after_RTI: # Store permanent disability incurred now to be accessed when Recovery Event is invoked. df.loc[person_id,'rt_disability_permanent'] = NN_model.loc[count,'rt_disability_permanent'] - self.sim.schedule_event(RTI_NNResolution_Recovery_Event(self.module, person_id), self.sim.date + DateOffset(days=duration_days)) + self.sim.schedule_event(RTI_NNResolution_Recovery_Event(self.module, person_id), df.loc[person_id, 'rt_date_inj'] + DateOffset(days=duration_days)) else: - self.sim.schedule_event(RTI_NNResolution_Death_Event(self.module, person_id), self.sim.date + DateOffset(days=duration_days)) + self.sim.schedule_event(RTI_NNResolution_Death_Event(self.module, person_id), df.loc[person_id, 'rt_date_inj'] + DateOffset(days=duration_days)) count += 1 From 2a622d1dab99aae0e97e4a82aa0c87b2c24d3085 Mon Sep 17 00:00:00 2001 From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com> Date: Fri, 13 Dec 2024 12:56:14 +0000 Subject: [PATCH 04/25] Style fixes --- src/tlo/methods/rti.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/tlo/methods/rti.py b/src/tlo/methods/rti.py index 9f0b66c968..7855353e1e 100644 --- a/src/tlo/methods/rti.py +++ b/src/tlo/methods/rti.py @@ -2872,7 +2872,7 @@ def apply(self, population): df.loc[diedfromrtiidx, 'rt_debugging_DALY_wt'] = 0 df.loc[diedfromrtiidx, 'rt_in_shock'] = False # reset whether they have been selected for an injury this month - df['rt_road_traffic_inc'] = False + df.loc[diedfromrtiidx,'rt_road_traffic_inc'] = False # --------------------------------- UPDATING OF RTI OVER TIME ------------------------------------------------- # Currently we have the following conditions for being able to be involved in a road traffic injury, they are @@ -2922,7 +2922,7 @@ def apply(self, population): NN_model = self.sample_NN_model(len(selected_for_rti)) # HS USAGE - # Get the total number of different types of appts accessed as a result of this polling event and add to rolling count. + # Get the total number of different types of appts that will be accessed as a result of this polling event and add to rolling count. for column in self.sim.modules['RTI'].HS_Use_Type: self.sim.modules['RTI'].HS_Use_by_RTI[column] += NN_model[column].sum() # Sum all values in the column From f6d684a5b4248803ce7f84fa3a332a3173920bff Mon Sep 17 00:00:00 2001 From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com> Date: Fri, 13 Dec 2024 16:36:48 +0000 Subject: [PATCH 05/25] Don't scheduled some of the regular events if using the emulator --- src/tlo/methods/healthsystem.py | 2 +- src/tlo/methods/rti.py | 21 ++++++++++++++------- 2 files changed, 15 insertions(+), 8 deletions(-) diff --git a/src/tlo/methods/healthsystem.py b/src/tlo/methods/healthsystem.py index 4341eff6fd..a3baa7edff 100644 --- a/src/tlo/methods/healthsystem.py +++ b/src/tlo/methods/healthsystem.py @@ -2005,7 +2005,7 @@ def on_end_of_year(self) -> None: self._summary_counter._appts_by_level[level_number][category] += value # Reset emulator counter. - self.sim.modules['RTI'].HS_Use_by_RTI = Counter({col: 0 for col in module.HS_Use_Type}) + self.sim.modules['RTI'].HS_Use_by_RTI = Counter({col: 0 for col in self.sim.modules['RTI'].HS_Use_Type}) self._summary_counter.write_to_log_and_reset_counters() self.consumables.on_end_of_year() diff --git a/src/tlo/methods/rti.py b/src/tlo/methods/rti.py index 7855353e1e..287c2742a6 100644 --- a/src/tlo/methods/rti.py +++ b/src/tlo/methods/rti.py @@ -25,6 +25,9 @@ from tlo.methods.hsi_generic_first_appts import HSIEventScheduler from tlo.population import IndividualProperties +# Decide whether to use emulator or not +use_emulator = True + # --------------------------------------------------------------------------------------------------------- # MODULE DEFINITIONS # --------------------------------------------------------------------------------------------------------- @@ -1537,12 +1540,17 @@ def initialise_simulation(self, sim): """ # Begin modelling road traffic injuries sim.schedule_event(RTIPollingEvent(self), sim.date + DateOffset(months=0)) - # Begin checking whether the persons injuries are healed - sim.schedule_event(RTI_Recovery_Event(self), sim.date + DateOffset(months=0)) - # Begin checking whether those with untreated injuries die - sim.schedule_event(RTI_Check_Death_No_Med(self), sim.date + DateOffset(months=0)) + + if use_emulator is False: + # Begin checking whether the persons injuries are healed + sim.schedule_event(RTI_Recovery_Event(self), sim.date + DateOffset(months=0)) + # Begin checking whether those with untreated injuries die + sim.schedule_event(RTI_Check_Death_No_Med(self), sim.date + DateOffset(months=0)) + # Begin logging the RTI events sim.schedule_event(RTI_Logging_Event(self), sim.date + DateOffset(months=1)) + + # Look-up consumable item codes self.look_up_consumable_item_codes() @@ -2814,7 +2822,7 @@ def __init__(self, module): def sample_NN_model(self, N): data = { - "is_alive_after_RTI": np.random.choice([True, False], size=N), + "is_alive_after_RTI": np.random.choice([True, False], size=N, p=[0.98, 0.02]), "duration_days": np.random.randint(0, 366, size=N), "rt_disability_average": np.random.uniform(0, 1, size=N), "rt_disability_permanent": np.random.uniform(0, 1, size=N), @@ -2911,8 +2919,7 @@ def apply(self, population): # Set the date that people were injured to now df.loc[selected_for_rti, 'rt_date_inj'] = now - # Decide whether to use emulator or not - use_emulator = True + if use_emulator: # This is where we want to replace normal course of events for RTI with emulator. From 563c03047953c6cede805dce4045c379728cec20 Mon Sep 17 00:00:00 2001 From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com> Date: Mon, 23 Dec 2024 09:25:53 +0100 Subject: [PATCH 06/25] Include CTGAN emulator and time duration of sim --- src/tlo/methods/rti.py | 48 ++++++++++++++++++++++++------------------ src/tlo/simulation.py | 5 ++++- 2 files changed, 32 insertions(+), 21 deletions(-) diff --git a/src/tlo/methods/rti.py b/src/tlo/methods/rti.py index 287c2742a6..9e751607f2 100644 --- a/src/tlo/methods/rti.py +++ b/src/tlo/methods/rti.py @@ -21,6 +21,8 @@ from tlo.methods.hsi_generic_first_appts import GenericFirstAppointmentsMixin from tlo.methods.symptommanager import Symptom +from sdv.single_table import CTGANSynthesizer + if TYPE_CHECKING: from tlo.methods.hsi_generic_first_appts import HSIEventScheduler from tlo.population import IndividualProperties @@ -28,6 +30,7 @@ # Decide whether to use emulator or not use_emulator = True +emulator_path = '/Users/mm2908/Desktop/CTGAN/RTI_emulator.pkl' # --------------------------------------------------------------------------------------------------------- # MODULE DEFINITIONS # --------------------------------------------------------------------------------------------------------- @@ -66,6 +69,10 @@ def __init__(self, name=None, resourcefilepath=None): # Initialize the counter with all items set to 0 HS_Use_by_RTI = Counter({col: 0 for col in HS_Use_Type}) + + RTI_emulator = CTGANSynthesizer.load( + filepath=emulator_path + ) INJURY_INDICES = range(1, 9) @@ -2880,7 +2887,7 @@ def apply(self, population): df.loc[diedfromrtiidx, 'rt_debugging_DALY_wt'] = 0 df.loc[diedfromrtiidx, 'rt_in_shock'] = False # reset whether they have been selected for an injury this month - df.loc[diedfromrtiidx,'rt_road_traffic_inc'] = False + df['rt_road_traffic_inc'] = False # --------------------------------- UPDATING OF RTI OVER TIME ------------------------------------------------- # Currently we have the following conditions for being able to be involved in a road traffic injury, they are @@ -2926,8 +2933,9 @@ def apply(self, population): # First, sample outcomes for individuals which were selected_for_rti. # For now, don't consider properties of individual when sampling outcome. All we care about is the number of samples. - NN_model = self.sample_NN_model(len(selected_for_rti)) - + NN_model = self.sim.modules['RTI'].RTI_emulator.sample(len(selected_for_rti)) + #NN_model = self.sample_NN_model(len(selected_for_rti)) + # HS USAGE # Get the total number of different types of appts that will be accessed as a result of this polling event and add to rolling count. for column in self.sim.modules['RTI'].HS_Use_Type: @@ -2942,29 +2950,29 @@ def apply(self, population): duration_days = int(NN_model.loc[count,'duration_days']) # Individual experiences an immediate death - if is_alive_after_RTI is False and duration_days == 0: + # if is_alive_after_RTI is False and duration_days == 0: # Keep track of who experience pre-hospital mortality with the property rt_imm_death # Do we need to keep this? Could probably ignore if using emulator. - df.loc[person_id, 'rt_imm_death'] = True + # df.loc[person_id, 'rt_imm_death'] = True # For each person selected to experience pre-hospital mortality, schedule an InstantaneosDeath event - self.sim.modules['Demography'].do_death(individual_id=individual_id, cause="RTI_imm_death", - originating_module=self.module) + # self.sim.modules['Demography'].do_death(individual_id=individual_id, cause="RTI_imm_death", + # originating_module=self.module) # Else individual doesn't immediately die, therefore schedule resolution + # else: + # Set disability to what will be the average over duration of the episode + df.loc[person_id,'rt_disability'] = NN_model.loc[count,'rt_disability_average'] + + # Make sure this person is not 'discoverable' by polling event next month. + df.loc[person_id,'rt_inj_severity'] = 'mild' # instead of "none", but actually we don't know how severe it is + + # Schedule resolution + if is_alive_after_RTI: + # Store permanent disability incurred now to be accessed when Recovery Event is invoked. + df.loc[person_id,'rt_disability_permanent'] = NN_model.loc[count,'rt_disability_permanent'] + self.sim.schedule_event(RTI_NNResolution_Recovery_Event(self.module, person_id), df.loc[person_id, 'rt_date_inj'] + DateOffset(days=duration_days)) else: - # Set disability to what will be the average over duration of the episode - df.loc[person_id,'rt_disability'] = NN_model.loc[count,'rt_disability_average'] - - # Make sure this person is not 'discoverable' by polling event next month. - df.loc[person_id,'rt_inj_severity'] = 'mild' # instead of "none", but actually we don't know how severe it is - - # Schedule resolution - if is_alive_after_RTI: - # Store permanent disability incurred now to be accessed when Recovery Event is invoked. - df.loc[person_id,'rt_disability_permanent'] = NN_model.loc[count,'rt_disability_permanent'] - self.sim.schedule_event(RTI_NNResolution_Recovery_Event(self.module, person_id), df.loc[person_id, 'rt_date_inj'] + DateOffset(days=duration_days)) - else: - self.sim.schedule_event(RTI_NNResolution_Death_Event(self.module, person_id), df.loc[person_id, 'rt_date_inj'] + DateOffset(days=duration_days)) + self.sim.schedule_event(RTI_NNResolution_Death_Event(self.module, person_id), df.loc[person_id, 'rt_date_inj'] + DateOffset(days=duration_days)) count += 1 diff --git a/src/tlo/simulation.py b/src/tlo/simulation.py index 547edf1d23..79d3ad90ef 100644 --- a/src/tlo/simulation.py +++ b/src/tlo/simulation.py @@ -342,7 +342,6 @@ def _update_progress_bar(self, progress_bar: ProgressBar, date: Date) -> None: def run_simulation_to(self, *, to_date: Date) -> None: """Run simulation up to a specified date. - Unlike :py:meth:`simulate` this method does not initialise or finalise simulation and the date simulated to can be any date before or equal to simulation end date. @@ -350,6 +349,8 @@ def run_simulation_to(self, *, to_date: Date) -> None: :param to_date: Date to simulate up to but not including - must be before or equal to simulation end date specified in call to :py:meth:`initialise`. """ + start_time = time.time() + if not self._initialised: msg = "Simulation must be initialised before calling run_simulation_to" raise SimulationNotInitialisedError(msg) @@ -368,6 +369,8 @@ def run_simulation_to(self, *, to_date: Date) -> None: self.date = to_date if self.show_progress_bar: progress_bar.stop() + end_time = time.time()- start_time + print("Time taken", end_time) def simulate(self, *, end_date: Date) -> None: """Simulate until the given end date From 76548ff0287f8a943e7b0b9542af985ef7d41691 Mon Sep 17 00:00:00 2001 From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com> Date: Mon, 23 Dec 2024 12:54:12 +0100 Subject: [PATCH 07/25] Use Gaussian emulator and print duration of run to file --- src/tlo/methods/rti.py | 39 ++++++++++++++++++++------------------- src/tlo/simulation.py | 4 +++- 2 files changed, 23 insertions(+), 20 deletions(-) diff --git a/src/tlo/methods/rti.py b/src/tlo/methods/rti.py index 9e751607f2..6b46a50e37 100644 --- a/src/tlo/methods/rti.py +++ b/src/tlo/methods/rti.py @@ -22,6 +22,7 @@ from tlo.methods.symptommanager import Symptom from sdv.single_table import CTGANSynthesizer +from sdv.single_table import GaussianCopulaSynthesizer if TYPE_CHECKING: from tlo.methods.hsi_generic_first_appts import HSIEventScheduler @@ -70,7 +71,7 @@ def __init__(self, name=None, resourcefilepath=None): # Initialize the counter with all items set to 0 HS_Use_by_RTI = Counter({col: 0 for col in HS_Use_Type}) - RTI_emulator = CTGANSynthesizer.load( + RTI_emulator = GaussianCopulaSynthesizer.load( filepath=emulator_path ) @@ -2950,30 +2951,30 @@ def apply(self, population): duration_days = int(NN_model.loc[count,'duration_days']) # Individual experiences an immediate death - # if is_alive_after_RTI is False and duration_days == 0: + if is_alive_after_RTI is False and duration_days == 0: # Keep track of who experience pre-hospital mortality with the property rt_imm_death # Do we need to keep this? Could probably ignore if using emulator. - # df.loc[person_id, 'rt_imm_death'] = True + df.loc[person_id, 'rt_imm_death'] = True # For each person selected to experience pre-hospital mortality, schedule an InstantaneosDeath event - # self.sim.modules['Demography'].do_death(individual_id=individual_id, cause="RTI_imm_death", - # originating_module=self.module) + self.sim.modules['Demography'].do_death(individual_id=individual_id, cause="RTI_imm_death", + originating_module=self.module) # Else individual doesn't immediately die, therefore schedule resolution - # else: - # Set disability to what will be the average over duration of the episode - df.loc[person_id,'rt_disability'] = NN_model.loc[count,'rt_disability_average'] - - # Make sure this person is not 'discoverable' by polling event next month. - df.loc[person_id,'rt_inj_severity'] = 'mild' # instead of "none", but actually we don't know how severe it is - - # Schedule resolution - if is_alive_after_RTI: - # Store permanent disability incurred now to be accessed when Recovery Event is invoked. - df.loc[person_id,'rt_disability_permanent'] = NN_model.loc[count,'rt_disability_permanent'] - self.sim.schedule_event(RTI_NNResolution_Recovery_Event(self.module, person_id), df.loc[person_id, 'rt_date_inj'] + DateOffset(days=duration_days)) else: - self.sim.schedule_event(RTI_NNResolution_Death_Event(self.module, person_id), df.loc[person_id, 'rt_date_inj'] + DateOffset(days=duration_days)) - + # Set disability to what will be the average over duration of the episode + df.loc[person_id,'rt_disability'] = NN_model.loc[count,'rt_disability_average'] + + # Make sure this person is not 'discoverable' by polling event next month. + df.loc[person_id,'rt_inj_severity'] = 'mild' # instead of "none", but actually we don't know how severe it is + + # Schedule resolution + if is_alive_after_RTI: + # Store permanent disability incurred now to be accessed when Recovery Event is invoked. + df.loc[person_id,'rt_disability_permanent'] = NN_model.loc[count,'rt_disability_permanent'] + self.sim.schedule_event(RTI_NNResolution_Recovery_Event(self.module, person_id), df.loc[person_id, 'rt_date_inj'] + DateOffset(days=duration_days)) + else: + self.sim.schedule_event(RTI_NNResolution_Death_Event(self.module, person_id), df.loc[person_id, 'rt_date_inj'] + DateOffset(days=duration_days)) + count += 1 else: diff --git a/src/tlo/simulation.py b/src/tlo/simulation.py index 79d3ad90ef..1af2c49477 100644 --- a/src/tlo/simulation.py +++ b/src/tlo/simulation.py @@ -370,7 +370,9 @@ def run_simulation_to(self, *, to_date: Date) -> None: if self.show_progress_bar: progress_bar.stop() end_time = time.time()- start_time - print("Time taken", end_time) + with open('duration_time.txt', 'a') as file: + # Append text to the file + file.write(f"Time taken, {end_time}\n") def simulate(self, *, end_date: Date) -> None: """Simulate until the given end date From 1b2d8f622073c209c7849c331e87ce1b70bfaac4 Mon Sep 17 00:00:00 2001 From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com> Date: Mon, 6 Jan 2025 10:32:23 +0100 Subject: [PATCH 08/25] Time duration --- src/tlo/methods/rti.py | 14 ++++++++++---- src/tlo/simulation.py | 1 + 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/src/tlo/methods/rti.py b/src/tlo/methods/rti.py index 6b46a50e37..4a668bb99b 100644 --- a/src/tlo/methods/rti.py +++ b/src/tlo/methods/rti.py @@ -29,7 +29,7 @@ from tlo.population import IndividualProperties # Decide whether to use emulator or not -use_emulator = True +use_emulator = False emulator_path = '/Users/mm2908/Desktop/CTGAN/RTI_emulator.pkl' # --------------------------------------------------------------------------------------------------------- @@ -1049,6 +1049,14 @@ def __init__(self, name=None, resourcefilepath=None): # Define the module's parameters PROPERTIES = { + 'rt_road_traffic_inc': Property(Types.BOOL, 'involved in a road traffic injury'), + 'rt_inj_severity': Property(Types.CATEGORICAL, + 'Injury status relating to road traffic injury: none, mild, severe', + categories=['none', 'mild', 'severe'], + ), + 'rt_disability': Property(Types.REAL, 'disability weight for current month'), + 'rt_disability_permanent': Property(Types.REAL, 'disability weight incurred permanently'), + 'rt_date_inj': Property(Types.DATE, 'date of latest injury'), 'rt_road_traffic_inc': Property(Types.BOOL, 'involved in a road traffic injury'), 'rt_inj_severity': Property(Types.CATEGORICAL, 'Injury status relating to road traffic injury: none, mild, severe', @@ -1095,9 +1103,7 @@ def __init__(self, name=None, resourcefilepath=None): 'rt_unavailable_med_death': Property(Types.BOOL, 'death in the following month without medical intervention ' 'being able to be provided'), 'rt_recovery_no_med': Property(Types.BOOL, 'recovery without medical intervention True/False'), - 'rt_disability': Property(Types.REAL, 'disability weight for current month'), - 'rt_disability_permanent': Property(Types.REAL, 'disability weight incurred permanently'), - 'rt_date_inj': Property(Types.DATE, 'date of latest injury'), + 'rt_med_int': Property(Types.BOOL, 'whether this person is currently undergoing medical treatment'), 'rt_in_icu_or_hdu': Property(Types.BOOL, 'whether this person is currently in ICU for RTI'), 'rt_MAIS_military_score': Property(Types.INT, 'the maximum AIS-military score, used as a proxy to calculate the' diff --git a/src/tlo/simulation.py b/src/tlo/simulation.py index 1af2c49477..5bca317535 100644 --- a/src/tlo/simulation.py +++ b/src/tlo/simulation.py @@ -373,6 +373,7 @@ def run_simulation_to(self, *, to_date: Date) -> None: with open('duration_time.txt', 'a') as file: # Append text to the file file.write(f"Time taken, {end_time}\n") + print("Time taken", end_time) def simulate(self, *, end_date: Date) -> None: """Simulate until the given end date From 240ddc5d04cb28d051b6c097b706ae18ee31301c Mon Sep 17 00:00:00 2001 From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com> Date: Wed, 8 Jan 2025 11:05:40 +0000 Subject: [PATCH 09/25] Remove severity of accident from variables stored --- src/tlo/methods/rti.py | 390 ++++++++++++++++++++++------------------- 1 file changed, 208 insertions(+), 182 deletions(-) diff --git a/src/tlo/methods/rti.py b/src/tlo/methods/rti.py index 4a668bb99b..ad1069d786 100644 --- a/src/tlo/methods/rti.py +++ b/src/tlo/methods/rti.py @@ -31,7 +31,7 @@ # Decide whether to use emulator or not use_emulator = False -emulator_path = '/Users/mm2908/Desktop/CTGAN/RTI_emulator.pkl' +emulator_path = '/Users/mm2908/Desktop/CTGAN/emulators/RTI_emulator.pkl' # --------------------------------------------------------------------------------------------------------- # MODULE DEFINITIONS # --------------------------------------------------------------------------------------------------------- @@ -1048,72 +1048,79 @@ def __init__(self, name=None, resourcefilepath=None): } # Define the module's parameters - PROPERTIES = { - 'rt_road_traffic_inc': Property(Types.BOOL, 'involved in a road traffic injury'), - 'rt_inj_severity': Property(Types.CATEGORICAL, - 'Injury status relating to road traffic injury: none, mild, severe', - categories=['none', 'mild', 'severe'], - ), - 'rt_disability': Property(Types.REAL, 'disability weight for current month'), - 'rt_disability_permanent': Property(Types.REAL, 'disability weight incurred permanently'), - 'rt_date_inj': Property(Types.DATE, 'date of latest injury'), - 'rt_road_traffic_inc': Property(Types.BOOL, 'involved in a road traffic injury'), - 'rt_inj_severity': Property(Types.CATEGORICAL, - 'Injury status relating to road traffic injury: none, mild, severe', - categories=['none', 'mild', 'severe'], - ), - **{ - f'rt_injury_{injury_index}': Property( - Types.CATEGORICAL, - f'Codes for injury {injury_index} from RTI', - categories=categories, - ) - # hacky solution to avoid issue that names defined in class scope are not - # accessible in scope of comprehension expresions _except for_ outermost - # iterator - see https://stackoverflow.com/a/13913933/4798943 - for injury_index, categories in zip( - INJURY_INDICES, [INJURY_CODES] * len(INJURY_INDICES) - ) - }, - **{ - f'rt_date_to_remove_daly_{injury_index}': Property( - Types.DATE, - f'Date to remove the daly weight for injury {injury_index}', - ) - for injury_index in INJURY_INDICES - }, - 'rt_in_shock': Property(Types.BOOL, 'A property determining if this person is in shock'), - 'rt_death_from_shock': Property(Types.BOOL, 'whether this person died from shock'), - 'rt_injuries_to_cast': Property(Types.LIST, 'A list of injuries that are to be treated with casts'), - 'rt_injuries_for_minor_surgery': Property(Types.LIST, 'A list of injuries that are to be treated with a minor' - 'surgery'), - 'rt_injuries_for_major_surgery': Property(Types.LIST, 'A list of injuries that are to be treated with a minor' - 'surgery'), - 'rt_injuries_to_heal_with_time': Property(Types.LIST, 'A list of injuries that heal without further treatment'), - 'rt_injuries_for_open_fracture_treatment': Property(Types.LIST, 'A list of injuries that with open fracture ' - 'treatment'), - 'rt_ISS_score': Property(Types.INT, 'The ISS score associated with the injuries resulting from a road traffic' - 'accident'), - 'rt_perm_disability': Property(Types.BOOL, 'whether the injuries from an RTI result in permanent disability'), - 'rt_polytrauma': Property(Types.BOOL, 'polytrauma from RTI'), - 'rt_imm_death': Property(Types.BOOL, 'death at scene True/False'), - 'rt_diagnosed': Property(Types.BOOL, 'Person has had their injuries diagnosed'), - 'rt_post_med_death': Property(Types.BOOL, 'death in following month despite medical intervention True/False'), - 'rt_no_med_death': Property(Types.BOOL, 'death in following month without medical intervention True/False'), - 'rt_unavailable_med_death': Property(Types.BOOL, 'death in the following month without medical intervention ' - 'being able to be provided'), - 'rt_recovery_no_med': Property(Types.BOOL, 'recovery without medical intervention True/False'), - - 'rt_med_int': Property(Types.BOOL, 'whether this person is currently undergoing medical treatment'), - 'rt_in_icu_or_hdu': Property(Types.BOOL, 'whether this person is currently in ICU for RTI'), - 'rt_MAIS_military_score': Property(Types.INT, 'the maximum AIS-military score, used as a proxy to calculate the' - 'probability of mortality without medical intervention'), - 'rt_date_death_no_med': Property(Types.DATE, 'the date which the person has is scheduled to die without medical' - 'intervention'), - 'rt_debugging_DALY_wt': Property(Types.REAL, 'The true value of the DALY weight burden'), - 'rt_injuries_left_untreated': Property(Types.LIST, 'A list of injuries that have been left untreated due to a ' - 'blocked intervention') - } + if use_emulator: + PROPERTIES = { + 'rt_disability': Property(Types.REAL, 'disability weight for current month'), + 'rt_disability_permanent': Property(Types.REAL, 'disability weight incurred permanently'), + 'rt_date_inj': Property(Types.DATE, 'date of latest injury'), + 'rt_road_traffic_inc': Property(Types.BOOL, 'involved in a road traffic injury'), + #'rt_inj_severity': Property(Types.CATEGORICAL, + # 'Injury status relating to road traffic injury: none, mild, severe', + # categories=['none', 'mild', 'severe'], + # ), + } + else: + PROPERTIES = { + 'rt_disability': Property(Types.REAL, 'disability weight for current month'), + 'rt_disability_permanent': Property(Types.REAL, 'disability weight incurred permanently'), + 'rt_date_inj': Property(Types.DATE, 'date of latest injury'), + 'rt_road_traffic_inc': Property(Types.BOOL, 'involved in a road traffic injury'), + 'rt_inj_severity': Property(Types.CATEGORICAL, + 'Injury status relating to road traffic injury: none, mild, severe', + categories=['none', 'mild', 'severe'], + ), + **{ + f'rt_injury_{injury_index}': Property( + Types.CATEGORICAL, + f'Codes for injury {injury_index} from RTI', + categories=categories, + ) + # hacky solution to avoid issue that names defined in class scope are not + # accessible in scope of comprehension expresions _except for_ outermost + # iterator - see https://stackoverflow.com/a/13913933/4798943 + for injury_index, categories in zip( + INJURY_INDICES, [INJURY_CODES] * len(INJURY_INDICES) + ) + }, + **{ + f'rt_date_to_remove_daly_{injury_index}': Property( + Types.DATE, + f'Date to remove the daly weight for injury {injury_index}', + ) + for injury_index in INJURY_INDICES + }, + 'rt_in_shock': Property(Types.BOOL, 'A property determining if this person is in shock'), + 'rt_death_from_shock': Property(Types.BOOL, 'whether this person died from shock'), + 'rt_injuries_to_cast': Property(Types.LIST, 'A list of injuries that are to be treated with casts'), + 'rt_injuries_for_minor_surgery': Property(Types.LIST, 'A list of injuries that are to be treated with a minor' + 'surgery'), + 'rt_injuries_for_major_surgery': Property(Types.LIST, 'A list of injuries that are to be treated with a minor' + 'surgery'), + 'rt_injuries_to_heal_with_time': Property(Types.LIST, 'A list of injuries that heal without further treatment'), + 'rt_injuries_for_open_fracture_treatment': Property(Types.LIST, 'A list of injuries that with open fracture ' + 'treatment'), + 'rt_ISS_score': Property(Types.INT, 'The ISS score associated with the injuries resulting from a road traffic' + 'accident'), + 'rt_perm_disability': Property(Types.BOOL, 'whether the injuries from an RTI result in permanent disability'), + 'rt_polytrauma': Property(Types.BOOL, 'polytrauma from RTI'), + 'rt_imm_death': Property(Types.BOOL, 'death at scene True/False'), + 'rt_diagnosed': Property(Types.BOOL, 'Person has had their injuries diagnosed'), + 'rt_post_med_death': Property(Types.BOOL, 'death in following month despite medical intervention True/False'), + 'rt_no_med_death': Property(Types.BOOL, 'death in following month without medical intervention True/False'), + 'rt_unavailable_med_death': Property(Types.BOOL, 'death in the following month without medical intervention ' + 'being able to be provided'), + 'rt_recovery_no_med': Property(Types.BOOL, 'recovery without medical intervention True/False'), + + 'rt_med_int': Property(Types.BOOL, 'whether this person is currently undergoing medical treatment'), + 'rt_in_icu_or_hdu': Property(Types.BOOL, 'whether this person is currently in ICU for RTI'), + 'rt_MAIS_military_score': Property(Types.INT, 'the maximum AIS-military score, used as a proxy to calculate the' + 'probability of mortality without medical intervention'), + 'rt_date_death_no_med': Property(Types.DATE, 'the date which the person has is scheduled to die without medical' + 'intervention'), + 'rt_debugging_DALY_wt': Property(Types.REAL, 'The true value of the DALY weight burden'), + 'rt_injuries_left_untreated': Property(Types.LIST, 'A list of injuries that have been left untreated due to a ' + 'blocked intervention') + } # Declare Metadata METADATA = { @@ -1505,38 +1512,46 @@ def initialise_population(self, population): for the RTI module is that people haven't been involved in a road traffic accident and are therefor alive and healthy.""" df = population.props - df.loc[df.is_alive, 'rt_road_traffic_inc'] = False - df.loc[df.is_alive, 'rt_inj_severity'] = "none" # default: no one has been injured in a RTI - for injury_index in RTI.INJURY_INDICES: - df.loc[df.is_alive, f'rt_injury_{injury_index}'] = "none" - df.loc[df.is_alive, f'rt_date_to_remove_daly_{injury_index}'] = pd.NaT - df.loc[df.is_alive, 'rt_in_shock'] = False - df.loc[df.is_alive, 'rt_death_from_shock'] = False - df.loc[df.is_alive, 'rt_polytrauma'] = False - df.loc[df.is_alive, 'rt_ISS_score'] = 0 - df.loc[df.is_alive, 'rt_perm_disability'] = False - df.loc[df.is_alive, 'rt_imm_death'] = False # default: no one is dead on scene of crash - df.loc[df.is_alive, 'rt_diagnosed'] = False - df.loc[df.is_alive, 'rt_recovery_no_med'] = False # default: no recovery without medical intervention - df.loc[df.is_alive, 'rt_post_med_death'] = False # default: no death after medical intervention - df.loc[df.is_alive, 'rt_no_med_death'] = False - df.loc[df.is_alive, 'rt_unavailable_med_death'] = False - df.loc[df.is_alive, 'rt_disability'] = 0 # default: no DALY - df.loc[df.is_alive, 'rt_disability_permanent'] = 0 # default: no DALY - df.loc[df.is_alive, 'rt_date_inj'] = pd.NaT - df.loc[df.is_alive, 'rt_med_int'] = False - df.loc[df.is_alive, 'rt_in_icu_or_hdu'] = False - df.loc[df.is_alive, 'rt_MAIS_military_score'] = 0 - df.loc[df.is_alive, 'rt_date_death_no_med'] = pd.NaT - df.loc[df.is_alive, 'rt_debugging_DALY_wt'] = 0 - alive_count = sum(df.is_alive) - df.loc[df.is_alive, 'rt_injuries_to_cast'] = pd.Series([[] for _ in range(alive_count)]) - df.loc[df.is_alive, 'rt_injuries_for_minor_surgery'] = pd.Series([[] for _ in range(alive_count)]) - df.loc[df.is_alive, 'rt_injuries_for_major_surgery'] = pd.Series([[] for _ in range(alive_count)]) - df.loc[df.is_alive, 'rt_injuries_to_heal_with_time'] = pd.Series([[] for _ in range(alive_count)]) - df.loc[df.is_alive, 'rt_injuries_for_open_fracture_treatment'] = pd.Series([[] for _ in range(alive_count)]) - df.loc[df.is_alive, 'rt_injuries_left_untreated'] = pd.Series([[] for _ in range(alive_count)]) + + if use_emulator: + df.loc[df.is_alive, 'rt_disability'] = 0 # default: no DALY + df.loc[df.is_alive, 'rt_disability_permanent'] = 0 # default: no DALY + df.loc[df.is_alive, 'rt_road_traffic_inc'] = False + df.loc[df.is_alive, 'rt_date_inj'] = pd.NaT + else: + df.loc[df.is_alive, 'rt_disability'] = 0 # default: no DALY + df.loc[df.is_alive, 'rt_disability_permanent'] = 0 # default: no DALY + df.loc[df.is_alive, 'rt_road_traffic_inc'] = False + df.loc[df.is_alive, 'rt_inj_severity'] = "none" # default: no one has been injured in a RTI + df.loc[df.is_alive, 'rt_date_inj'] = pd.NaT + for injury_index in RTI.INJURY_INDICES: + df.loc[df.is_alive, f'rt_injury_{injury_index}'] = "none" + df.loc[df.is_alive, f'rt_date_to_remove_daly_{injury_index}'] = pd.NaT + df.loc[df.is_alive, 'rt_in_shock'] = False + df.loc[df.is_alive, 'rt_death_from_shock'] = False + df.loc[df.is_alive, 'rt_polytrauma'] = False + df.loc[df.is_alive, 'rt_ISS_score'] = 0 + df.loc[df.is_alive, 'rt_perm_disability'] = False + df.loc[df.is_alive, 'rt_imm_death'] = False # default: no one is dead on scene of crash + df.loc[df.is_alive, 'rt_diagnosed'] = False + df.loc[df.is_alive, 'rt_recovery_no_med'] = False # default: no recovery without medical intervention + df.loc[df.is_alive, 'rt_post_med_death'] = False # default: no death after medical intervention + df.loc[df.is_alive, 'rt_no_med_death'] = False + df.loc[df.is_alive, 'rt_unavailable_med_death'] = False + df.loc[df.is_alive, 'rt_med_int'] = False + df.loc[df.is_alive, 'rt_in_icu_or_hdu'] = False + df.loc[df.is_alive, 'rt_MAIS_military_score'] = 0 + df.loc[df.is_alive, 'rt_date_death_no_med'] = pd.NaT + df.loc[df.is_alive, 'rt_debugging_DALY_wt'] = 0 + alive_count = sum(df.is_alive) + df.loc[df.is_alive, 'rt_injuries_to_cast'] = pd.Series([[] for _ in range(alive_count)]) + df.loc[df.is_alive, 'rt_injuries_for_minor_surgery'] = pd.Series([[] for _ in range(alive_count)]) + df.loc[df.is_alive, 'rt_injuries_for_major_surgery'] = pd.Series([[] for _ in range(alive_count)]) + df.loc[df.is_alive, 'rt_injuries_to_heal_with_time'] = pd.Series([[] for _ in range(alive_count)]) + df.loc[df.is_alive, 'rt_injuries_for_open_fracture_treatment'] = pd.Series([[] for _ in range(alive_count)]) + df.loc[df.is_alive, 'rt_injuries_left_untreated'] = pd.Series([[] for _ in range(alive_count)]) + def initialise_simulation(self, sim): """At the start of the simulation we schedule a logging event, which records the relevant information regarding road traffic injuries in the last month. @@ -1553,16 +1568,16 @@ def initialise_simulation(self, sim): haven't then it asks whether they should die away from their injuries """ # Begin modelling road traffic injuries - sim.schedule_event(RTIPollingEvent(self), sim.date + DateOffset(months=0)) + #sim.schedule_event(RTIPollingEvent(self), sim.date + DateOffset(months=0)) - if use_emulator is False: + #if use_emulator is False: # Begin checking whether the persons injuries are healed - sim.schedule_event(RTI_Recovery_Event(self), sim.date + DateOffset(months=0)) + # sim.schedule_event(RTI_Recovery_Event(self), sim.date + DateOffset(months=0)) # Begin checking whether those with untreated injuries die - sim.schedule_event(RTI_Check_Death_No_Med(self), sim.date + DateOffset(months=0)) + # sim.schedule_event(RTI_Check_Death_No_Med(self), sim.date + DateOffset(months=0)) # Begin logging the RTI events - sim.schedule_event(RTI_Logging_Event(self), sim.date + DateOffset(months=1)) + # sim.schedule_event(RTI_Logging_Event(self), sim.date + DateOffset(months=1)) # Look-up consumable item codes @@ -2304,35 +2319,42 @@ def on_birth(self, mother_id, child_id): :return: n/a """ df = self.sim.population.props - df.at[child_id, 'rt_road_traffic_inc'] = False - df.at[child_id, 'rt_inj_severity'] = "none" # default: no one has been injured in a RTI - for injury_index in RTI.INJURY_INDICES: - df.at[child_id, f'rt_injury_{injury_index}'] = "none" - df.at[child_id, f'rt_date_to_remove_daly_{injury_index}'] = pd.NaT - df.at[child_id, 'rt_in_shock'] = False - df.at[child_id, 'rt_death_from_shock'] = False - df.at[child_id, 'rt_injuries_to_cast'] = [] - df.at[child_id, 'rt_injuries_for_minor_surgery'] = [] - df.at[child_id, 'rt_injuries_for_major_surgery'] = [] - df.at[child_id, 'rt_injuries_to_heal_with_time'] = [] - df.at[child_id, 'rt_injuries_for_open_fracture_treatment'] = [] - df.at[child_id, 'rt_polytrauma'] = False - df.at[child_id, 'rt_ISS_score'] = 0 - df.at[child_id, 'rt_imm_death'] = False - df.at[child_id, 'rt_perm_disability'] = False - df.at[child_id, 'rt_med_int'] = False # default: no one has a had medical intervention - df.at[child_id, 'rt_in_icu_or_hdu'] = False - df.at[child_id, 'rt_diagnosed'] = False - df.at[child_id, 'rt_recovery_no_med'] = False # default: no recovery without medical intervention - df.at[child_id, 'rt_post_med_death'] = False # default: no death after medical intervention - df.at[child_id, 'rt_no_med_death'] = False - df.at[child_id, 'rt_unavailable_med_death'] = False - df.at[child_id, 'rt_disability'] = 0 # default: no disability due to RTI - df.at[child_id, 'rt_date_inj'] = pd.NaT - df.at[child_id, 'rt_MAIS_military_score'] = 0 - df.at[child_id, 'rt_date_death_no_med'] = pd.NaT - df.at[child_id, 'rt_debugging_DALY_wt'] = 0 - df.at[child_id, 'rt_injuries_left_untreated'] = [] + + if use_emulator: + df.at[child_id, 'rt_disability'] = 0 # default: no DALY + df.at[child_id, 'rt_disability_permanent'] = 0 # default: no DALY + df.at[child_id, 'rt_road_traffic_inc'] = False + df.at[child_id, 'rt_date_inj'] = pd.NaT + else: + df.at[child_id, 'rt_road_traffic_inc'] = False + df.at[child_id, 'rt_inj_severity'] = "none" # default: no one has been injured in a RTI + for injury_index in RTI.INJURY_INDICES: + df.at[child_id, f'rt_injury_{injury_index}'] = "none" + df.at[child_id, f'rt_date_to_remove_daly_{injury_index}'] = pd.NaT + df.at[child_id, 'rt_in_shock'] = False + df.at[child_id, 'rt_death_from_shock'] = False + df.at[child_id, 'rt_injuries_to_cast'] = [] + df.at[child_id, 'rt_injuries_for_minor_surgery'] = [] + df.at[child_id, 'rt_injuries_for_major_surgery'] = [] + df.at[child_id, 'rt_injuries_to_heal_with_time'] = [] + df.at[child_id, 'rt_injuries_for_open_fracture_treatment'] = [] + df.at[child_id, 'rt_polytrauma'] = False + df.at[child_id, 'rt_ISS_score'] = 0 + df.at[child_id, 'rt_imm_death'] = False + df.at[child_id, 'rt_perm_disability'] = False + df.at[child_id, 'rt_med_int'] = False # default: no one has a had medical intervention + df.at[child_id, 'rt_in_icu_or_hdu'] = False + df.at[child_id, 'rt_diagnosed'] = False + df.at[child_id, 'rt_recovery_no_med'] = False # default: no recovery without medical intervention + df.at[child_id, 'rt_post_med_death'] = False # default: no death after medical intervention + df.at[child_id, 'rt_no_med_death'] = False + df.at[child_id, 'rt_unavailable_med_death'] = False + df.at[child_id, 'rt_disability'] = 0 # default: no disability due to RTI + df.at[child_id, 'rt_date_inj'] = pd.NaT + df.at[child_id, 'rt_MAIS_military_score'] = 0 + df.at[child_id, 'rt_date_death_no_med'] = pd.NaT + df.at[child_id, 'rt_debugging_DALY_wt'] = 0 + df.at[child_id, 'rt_injuries_left_untreated'] = [] def look_up_consumable_item_codes(self): """Look up the item codes that used in the HSI in the module""" @@ -2864,47 +2886,51 @@ def apply(self, population): """ df = population.props now = self.sim.date - # Reset injury properties after death, get an index of people who have died due to RTI, all causes - diedfromrtiidx = df.index[df.rt_imm_death | df.rt_post_med_death | df.rt_no_med_death | df.rt_death_from_shock | - df.rt_unavailable_med_death] - df.loc[diedfromrtiidx, "rt_imm_death"] = False - df.loc[diedfromrtiidx, "rt_post_med_death"] = False - df.loc[diedfromrtiidx, "rt_no_med_death"] = False - df.loc[diedfromrtiidx, "rt_unavailable_med_death"] = False - df.loc[diedfromrtiidx, "rt_disability"] = 0 - df.loc[diedfromrtiidx, "rt_med_int"] = False - df.loc[diedfromrtiidx, 'rt_in_icu_or_hdu'] = False - for index, row in df.loc[diedfromrtiidx].iterrows(): - df.at[index, 'rt_injuries_to_cast'] = [] - df.at[index, 'rt_injuries_for_minor_surgery'] = [] - df.at[index, 'rt_injuries_for_major_surgery'] = [] - df.at[index, 'rt_injuries_to_heal_with_time'] = [] - df.at[index, 'rt_injuries_for_open_fracture_treatment'] = [] - df.at[index, 'rt_injuries_left_untreated'] = [] - df.loc[diedfromrtiidx, "rt_diagnosed"] = False - df.loc[diedfromrtiidx, "rt_polytrauma"] = False - df.loc[diedfromrtiidx, "rt_inj_severity"] = "none" - df.loc[diedfromrtiidx, "rt_perm_disability"] = False - for injury_column in RTI.INJURY_COLUMNS: - df.loc[diedfromrtiidx, injury_column] = "none" - for date_to_remove_daly_column in RTI.DATE_TO_REMOVE_DALY_COLUMNS: - df.loc[diedfromrtiidx, date_to_remove_daly_column] = pd.NaT - df.loc[diedfromrtiidx, 'rt_date_death_no_med'] = pd.NaT - df.loc[diedfromrtiidx, 'rt_MAIS_military_score'] = 0 - df.loc[diedfromrtiidx, 'rt_debugging_DALY_wt'] = 0 - df.loc[diedfromrtiidx, 'rt_in_shock'] = False - # reset whether they have been selected for an injury this month - df['rt_road_traffic_inc'] = False - - # --------------------------------- UPDATING OF RTI OVER TIME ------------------------------------------------- - # Currently we have the following conditions for being able to be involved in a road traffic injury, they are - # alive, they aren't currently injured, they didn't die immediately in - # a road traffic injury in the last month and finally, they aren't currently being treated for a road traffic - # injury. - rt_current_non_ind = df.index[df.is_alive & ~df.rt_road_traffic_inc & ~df.rt_imm_death & ~df.rt_med_int & - (df.rt_inj_severity == "none")] - - # ========= Update for people currently not involved in a RTI, make some involved in a RTI event ============== + + if use_emulator: + rt_current_non_ind = df.index[df.is_alive & ~df.rt_road_traffic_inc] + else: + # Reset injury properties after death, get an index of people who have died due to RTI, all causes + diedfromrtiidx = df.index[df.rt_imm_death | df.rt_post_med_death | df.rt_no_med_death | df.rt_death_from_shock | + df.rt_unavailable_med_death] + df.loc[diedfromrtiidx, "rt_imm_death"] = False + df.loc[diedfromrtiidx, "rt_post_med_death"] = False + df.loc[diedfromrtiidx, "rt_no_med_death"] = False + df.loc[diedfromrtiidx, "rt_unavailable_med_death"] = False + df.loc[diedfromrtiidx, "rt_disability"] = 0 + df.loc[diedfromrtiidx, "rt_med_int"] = False + df.loc[diedfromrtiidx, 'rt_in_icu_or_hdu'] = False + for index, row in df.loc[diedfromrtiidx].iterrows(): + df.at[index, 'rt_injuries_to_cast'] = [] + df.at[index, 'rt_injuries_for_minor_surgery'] = [] + df.at[index, 'rt_injuries_for_major_surgery'] = [] + df.at[index, 'rt_injuries_to_heal_with_time'] = [] + df.at[index, 'rt_injuries_for_open_fracture_treatment'] = [] + df.at[index, 'rt_injuries_left_untreated'] = [] + df.loc[diedfromrtiidx, "rt_diagnosed"] = False + df.loc[diedfromrtiidx, "rt_polytrauma"] = False + df.loc[diedfromrtiidx, "rt_inj_severity"] = "none" + df.loc[diedfromrtiidx, "rt_perm_disability"] = False + for injury_column in RTI.INJURY_COLUMNS: + df.loc[diedfromrtiidx, injury_column] = "none" + for date_to_remove_daly_column in RTI.DATE_TO_REMOVE_DALY_COLUMNS: + df.loc[diedfromrtiidx, date_to_remove_daly_column] = pd.NaT + df.loc[diedfromrtiidx, 'rt_date_death_no_med'] = pd.NaT + df.loc[diedfromrtiidx, 'rt_MAIS_military_score'] = 0 + df.loc[diedfromrtiidx, 'rt_debugging_DALY_wt'] = 0 + df.loc[diedfromrtiidx, 'rt_in_shock'] = False + # reset whether they have been selected for an injury this month + df['rt_road_traffic_inc'] = False + + # --------------------------------- UPDATING OF RTI OVER TIME ------------------------------------------------- + # Currently we have the following conditions for being able to be involved in a road traffic injury, they are + # alive, they aren't currently injured, they didn't die immediately in + # a road traffic injury in the last month and finally, they aren't currently being treated for a road traffic + # injury. + rt_current_non_ind = df.index[df.is_alive & ~df.rt_road_traffic_inc & ~df.rt_imm_death & ~df.rt_med_int & + (df.rt_inj_severity == "none")] + + # ========= Update for people currently not involved in a RTI, make some involved in a RTI event ============== # Use linear model helper class eq = LinearModel(LinearModelType.MULTIPLICATIVE, self.base_1m_prob_rti, @@ -2927,7 +2953,7 @@ def apply(self, population): pred = eq.predict(df.loc[rt_current_non_ind]) random_draw_in_rti = self.module.rng.random_sample(size=len(rt_current_non_ind)) selected_for_rti = rt_current_non_ind[pred > random_draw_in_rti] - + # Update to say they have been involved in a rti df.loc[selected_for_rti, 'rt_road_traffic_inc'] = True # Set the date that people were injured to now @@ -2951,27 +2977,27 @@ def apply(self, population): # Change current properties of the individual and schedule resolution of event. count = 0 for person_id in selected_for_rti: - + + # These properties are determined by the NN sampling is_alive_after_RTI = NN_model.loc[count,'is_alive_after_RTI'] + # Why does this require an int wrapper to work with DateOffset? duration_days = int(NN_model.loc[count,'duration_days']) # Individual experiences an immediate death if is_alive_after_RTI is False and duration_days == 0: - # Keep track of who experience pre-hospital mortality with the property rt_imm_death - # Do we need to keep this? Could probably ignore if using emulator. - df.loc[person_id, 'rt_imm_death'] = True + # For each person selected to experience pre-hospital mortality, schedule an InstantaneosDeath event self.sim.modules['Demography'].do_death(individual_id=individual_id, cause="RTI_imm_death", originating_module=self.module) # Else individual doesn't immediately die, therefore schedule resolution else: - # Set disability to what will be the average over duration of the episode + # Set disability to what will be the average over duration of the episodef df.loc[person_id,'rt_disability'] = NN_model.loc[count,'rt_disability_average'] - + # Make sure this person is not 'discoverable' by polling event next month. - df.loc[person_id,'rt_inj_severity'] = 'mild' # instead of "none", but actually we don't know how severe it is + #df.loc[person_id,'rt_inj_severity'] = 'mild' # instead of "none", but actually we don't know how severe it is # Schedule resolution if is_alive_after_RTI: @@ -5491,7 +5517,7 @@ def apply(self, person_id): # Updat rt disability with long term outcome of accident df.loc[person_id,'rt_disability'] = df.loc[person_id,'rt_disability_permanent'] # Ensure that this person will be 'eligible' for rti injury again - df.loc[person_id,'rt_inj_severity'] = "none" + df.loc[person_id,'rt_road_traffic_inc'] = False class RTI_No_Lifesaving_Medical_Intervention_Death_Event(Event, IndividualScopeEventMixin): From e5df08b1bcb2ebdb3f6a54e140ad4b44865ab640 Mon Sep 17 00:00:00 2001 From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com> Date: Mon, 13 Jan 2025 16:31:33 +0000 Subject: [PATCH 10/25] Add alternative genAI option [skip ci] --- .../analysis_extract_data.py | 317 ++++++++++ .../scenario_impact_of_actual_vs_funded.py | 117 ++++ .../rti_emulator/analysis_extract_data.py | 549 ++++++++++++++++++ .../scenario_test_rti_emulator.py | 153 +++++ src/tlo/methods/healthsystem.py | 45 +- src/tlo/methods/rti.py | 20 +- 6 files changed, 1172 insertions(+), 29 deletions(-) create mode 100644 src/scripts/healthsystem/impact_of_actual_vs_funded/analysis_extract_data.py create mode 100644 src/scripts/healthsystem/impact_of_actual_vs_funded/scenario_impact_of_actual_vs_funded.py create mode 100644 src/scripts/rti_emulator/analysis_extract_data.py create mode 100644 src/scripts/rti_emulator/scenario_test_rti_emulator.py diff --git a/src/scripts/healthsystem/impact_of_actual_vs_funded/analysis_extract_data.py b/src/scripts/healthsystem/impact_of_actual_vs_funded/analysis_extract_data.py new file mode 100644 index 0000000000..4f2f0b8f35 --- /dev/null +++ b/src/scripts/healthsystem/impact_of_actual_vs_funded/analysis_extract_data.py @@ -0,0 +1,317 @@ +"""Produce plots to show the health impact (deaths, dalys) each the healthcare system (overall health impact) when +running under different MODES and POLICIES (scenario_impact_of_actual_vs_funded.py)""" + +# short tclose -> ideal case +# long tclose -> status quo +import argparse +from pathlib import Path +from typing import Tuple + +import numpy as np +import pandas as pd +from matplotlib import pyplot as plt +from matplotlib.colors import Normalize +from scipy.optimize import curve_fit + +from tlo import Date +from tlo.analysis.utils import extract_results, summarize +from tlo.analysis.life_expectancy import get_life_expectancy_estimates + + +# Range of years considered +min_year = 2010 +max_year = 2040 + + +def apply(results_folder: Path, output_folder: Path, resourcefilepath: Path = None, ): + """Produce standard set of plots describing the effect of each TREATMENT_ID. + - We estimate the epidemiological impact as the EXTRA deaths that would occur if that treatment did not occur. + - We estimate the draw on healthcare system resources as the FEWER appointments when that treatment does not occur. + """ + + TARGET_PERIOD = (Date(min_year, 1, 1), Date(max_year, 1, 1)) + + # Definitions of general helper functions + make_graph_file_name = lambda stub: output_folder / f"{stub.replace('*', '_star_')}.png" # noqa: E731 + + def target_period() -> str: + """Returns the target period as a string of the form YYYY-YYYY""" + return "-".join(str(t.year) for t in TARGET_PERIOD) + + def get_parameter_names_from_scenario_file() -> Tuple[str]: + """Get the tuple of names of the scenarios from `Scenario` class used to create the results.""" + from scripts.healthsystem.impact_of_actual_vs_funded.scenario_impact_of_actual_vs_funded import ( + ImpactOfHealthSystemMode, + ) + e = ImpactOfHealthSystemMode() + return tuple(e._scenarios.keys()) + + def get_num_deaths(_df): + """Return total number of Deaths (total within the TARGET_PERIOD) + """ + return pd.Series(data=len(_df.loc[pd.to_datetime(_df.date).between(*TARGET_PERIOD)])) + + def get_num_dalys(_df): + """Return total number of DALYs (Stacked) by label (total within the TARGET_PERIOD)""" + return pd.Series( + data=_df + .loc[_df.year.between(*[i.year for i in TARGET_PERIOD])] + .drop(columns=['date', 'sex', 'age_range', 'year']) + .sum().sum() + ) + + def get_num_dalys_by_cause(_df): + """Return number of DALYs by cause by label (total within the TARGET_PERIOD)""" + return pd.Series( + data=_df + .loc[_df.year.between(*[i.year for i in TARGET_PERIOD])] + .drop(columns=['date', 'sex', 'age_range', 'year']) + .sum() + ) + + def set_param_names_as_column_index_level_0(_df): + """Set the columns index (level 0) as the param_names.""" + ordered_param_names_no_prefix = {i: x for i, x in enumerate(param_names)} + names_of_cols_level0 = [ordered_param_names_no_prefix.get(col) for col in _df.columns.levels[0]] + assert len(names_of_cols_level0) == len(_df.columns.levels[0]) + _df.columns = _df.columns.set_levels(names_of_cols_level0, level=0) + return _df + + def find_difference_relative_to_comparison(_ser: pd.Series, + comparison: str, + scaled: bool = False, + drop_comparison: bool = True, + ): + """Find the difference in the values in a pd.Series with a multi-index, between the draws (level 0) + within the runs (level 1), relative to where draw = `comparison`. + The comparison is `X - COMPARISON`.""" + return _ser \ + .unstack(level=0) \ + .apply(lambda x: (x - x[comparison]) / (x[comparison] if scaled else 1.0), axis=1) \ + .drop(columns=([comparison] if drop_comparison else [])) \ + .stack() + + + def get_counts_of_hsi_by_treatment_id(_df): + """Get the counts of the short TREATMENT_IDs occurring""" + _counts_by_treatment_id = _df \ + .loc[pd.to_datetime(_df['date']).between(*TARGET_PERIOD), 'TREATMENT_ID'] \ + .apply(pd.Series) \ + .sum() \ + .astype(int) + return _counts_by_treatment_id.groupby(level=0).sum() + + year_target = 2023 + def get_counts_of_hsi_by_treatment_id_by_year(_df): + """Get the counts of the short TREATMENT_IDs occurring""" + _counts_by_treatment_id = _df \ + .loc[pd.to_datetime(_df['date']).dt.year ==year_target, 'TREATMENT_ID'] \ + .apply(pd.Series) \ + .sum() \ + .astype(int) + return _counts_by_treatment_id.groupby(level=0).sum() + + def get_counts_of_hsi_by_short_treatment_id(_df): + """Get the counts of the short TREATMENT_IDs occurring (shortened, up to first underscore)""" + _counts_by_treatment_id = get_counts_of_hsi_by_treatment_id(_df) + _short_treatment_id = _counts_by_treatment_id.index.map(lambda x: x.split('_')[0] + "*") + return _counts_by_treatment_id.groupby(by=_short_treatment_id).sum() + + def get_counts_of_hsi_by_short_treatment_id_by_year(_df): + """Get the counts of the short TREATMENT_IDs occurring (shortened, up to first underscore)""" + _counts_by_treatment_id = get_counts_of_hsi_by_treatment_id_by_year(_df) + _short_treatment_id = _counts_by_treatment_id.index.map(lambda x: x.split('_')[0] + "*") + return _counts_by_treatment_id.groupby(by=_short_treatment_id).sum() + + + # Obtain parameter names for this scenario file + param_names = get_parameter_names_from_scenario_file() + print(param_names) + + # ================================================================================================ + # TIME EVOLUTION OF TOTAL DALYs + # Plot DALYs averted compared to the ``No Policy'' policy + + year_target = 2023 # This global variable will be passed to custom function + def get_num_dalys_by_year(_df): + """Return total number of DALYs (Stacked) by label (total within the TARGET_PERIOD)""" + return pd.Series( + data=_df + .loc[_df.year == year_target] + .drop(columns=['date', 'sex', 'age_range', 'year']) + .sum().sum() + ) + + ALL = {} + # Plot time trend show year prior transition as well to emphasise that until that point DALYs incurred + # are consistent across different policies + this_min_year = 2010 + for year in range(this_min_year, max_year+1): + year_target = year + num_dalys_by_year = extract_results( + results_folder, + module='tlo.methods.healthburden', + key='dalys_stacked', + custom_generate_series=get_num_dalys_by_year, + do_scaling=True + ).pipe(set_param_names_as_column_index_level_0) + ALL[year_target] = num_dalys_by_year + # Concatenate the DataFrames into a single DataFrame + concatenated_df = pd.concat(ALL.values(), keys=ALL.keys()) + concatenated_df.index = concatenated_df.index.set_names(['date', 'index_original']) + concatenated_df = concatenated_df.reset_index(level='index_original',drop=True) + dalys_by_year = concatenated_df + print(dalys_by_year) + dalys_by_year.to_csv('ConvertedOutputs/Total_DALYs_with_time.csv', index=True) + + # ================================================================================================ + # Print population under each scenario + pop_model = extract_results(results_folder, + module="tlo.methods.demography", + key="population", + column="total", + index="date", + do_scaling=True + ).pipe(set_param_names_as_column_index_level_0) + + pop_model.index = pop_model.index.year + pop_model = pop_model[(pop_model.index >= this_min_year) & (pop_model.index <= max_year)] + print(pop_model) + assert dalys_by_year.index.equals(pop_model.index) + assert all(dalys_by_year.columns == pop_model.columns) + pop_model.to_csv('ConvertedOutputs/Population_with_time.csv', index=True) + + # ================================================================================================ + # DALYs BROKEN DOWN BY CAUSES AND YEAR + # DALYs by cause per year + # %% Quantify the health losses associated with all interventions combined. + + year_target = 2023 # This global variable will be passed to custom function + def get_num_dalys_by_year_and_cause(_df): + """Return total number of DALYs (Stacked) by label (total within the TARGET_PERIOD)""" + return pd.Series( + data=_df + .loc[_df.year == year_target] + .drop(columns=['date', 'sex', 'age_range', 'year']) + .sum() + ) + + ALL = {} + # Plot time trend show year prior transition as well to emphasise that until that point DALYs incurred + # are consistent across different policies + this_min_year = 2010 + for year in range(this_min_year, max_year+1): + year_target = year + num_dalys_by_year = extract_results( + results_folder, + module='tlo.methods.healthburden', + key='dalys_stacked', + custom_generate_series=get_num_dalys_by_year_and_cause, + do_scaling=True + ).pipe(set_param_names_as_column_index_level_0) + ALL[year_target] = num_dalys_by_year #summarize(num_dalys_by_year) + + # Concatenate the DataFrames into a single DataFrame + concatenated_df = pd.concat(ALL.values(), keys=ALL.keys()) + + concatenated_df.index = concatenated_df.index.set_names(['date', 'cause']) + + df_total = concatenated_df + df_total.to_csv('ConvertedOutputs/DALYS_by_cause_with_time.csv', index=True) + + ALL = {} + # Plot time trend show year prior transition as well to emphasise that until that point DALYs incurred + # are consistent across different policies + for year in range(min_year, max_year+1): + year_target = year + + hsi_delivered_by_year = extract_results( + results_folder, + module='tlo.methods.healthsystem.summary', + key='HSI_Event', + custom_generate_series=get_counts_of_hsi_by_short_treatment_id_by_year, + do_scaling=True + ).pipe(set_param_names_as_column_index_level_0) + ALL[year_target] = hsi_delivered_by_year + + # Concatenate the DataFrames into a single DataFrame + concatenated_df = pd.concat(ALL.values(), keys=ALL.keys()) + concatenated_df.index = concatenated_df.index.set_names(['date', 'cause']) + HSI_ran_by_year = concatenated_df + + del ALL + + ALL = {} + # Plot time trend show year prior transition as well to emphasise that until that point DALYs incurred + # are consistent across different policies + for year in range(min_year, max_year+1): + year_target = year + + hsi_not_delivered_by_year = extract_results( + results_folder, + module='tlo.methods.healthsystem.summary', + key='Never_ran_HSI_Event', + custom_generate_series=get_counts_of_hsi_by_short_treatment_id_by_year, + do_scaling=True + ).pipe(set_param_names_as_column_index_level_0) + ALL[year_target] = hsi_not_delivered_by_year + + # Concatenate the DataFrames into a single DataFrame + concatenated_df = pd.concat(ALL.values(), keys=ALL.keys()) + concatenated_df.index = concatenated_df.index.set_names(['date', 'cause']) + HSI_never_ran_by_year = concatenated_df + + HSI_never_ran_by_year = HSI_never_ran_by_year.fillna(0) #clean_df( + HSI_ran_by_year = HSI_ran_by_year.fillna(0) + HSI_total_by_year = HSI_ran_by_year.add(HSI_never_ran_by_year, fill_value=0) + HSI_ran_by_year.to_csv('ConvertedOutputs/HSIs_ran_by_area_with_time.csv', index=True) + HSI_never_ran_by_year.to_csv('ConvertedOutputs/HSIs_never_ran_by_area_with_time.csv', index=True) + print(HSI_ran_by_year) + print(HSI_never_ran_by_year) + print(HSI_total_by_year) + +if __name__ == "__main__": + rfp = Path('resources') + + parser = argparse.ArgumentParser( + description="Produce plots to show the impact each set of treatments", + formatter_class=argparse.ArgumentDefaultsHelpFormatter, + ) + parser.add_argument( + "--output-path", + help=( + "Directory to write outputs to. If not specified (set to None) outputs " + "will be written to value of --results-path argument." + ), + type=Path, + default=None, + required=False, + ) + parser.add_argument( + "--resources-path", + help="Directory containing resource files", + type=Path, + default=Path('resources'), + required=False, + ) + parser.add_argument( + "--results-path", + type=Path, + help=( + "Directory containing results from running " + "src/scripts/healthsystem/impact_of_actual_vs_funded/scenario_impact_of_actual_vs_funded.py " + ), + default=None, + required=False + ) + args = parser.parse_args() + assert args.results_path is not None + results_path = args.results_path + + output_path = results_path if args.output_path is None else args.output_path + + apply( + results_folder=results_path, + output_folder=output_path, + resourcefilepath=args.resources_path + ) diff --git a/src/scripts/healthsystem/impact_of_actual_vs_funded/scenario_impact_of_actual_vs_funded.py b/src/scripts/healthsystem/impact_of_actual_vs_funded/scenario_impact_of_actual_vs_funded.py new file mode 100644 index 0000000000..8c5cb7d932 --- /dev/null +++ b/src/scripts/healthsystem/impact_of_actual_vs_funded/scenario_impact_of_actual_vs_funded.py @@ -0,0 +1,117 @@ +"""This Scenario file run the model under different assumptions for HR capabilities expansion in order to estimate the +impact that is achieved under each. + +Run on the batch system using: +``` +tlo batch-submit src/scripts/healthsystem/impact_of_policy/scenario_impact_actual_vs_funded.py +``` + +or locally using: +``` +tlo scenario-run src/scripts/healthsystem/impact_of_policy/scenario_impact_actual_vs_funded.py +``` + +""" +from pathlib import Path +from typing import Dict + +import pandas as pd + +from tlo import Date, logging +from tlo.analysis.utils import get_parameters_for_status_quo, mix_scenarios +from tlo.methods.fullmodel import fullmodel +from tlo.methods.scenario_switcher import ImprovedHealthSystemAndCareSeekingScenarioSwitcher +from tlo.scenario import BaseScenario + + +class ImpactOfHealthSystemMode(BaseScenario): + def __init__(self): + super().__init__() + self.seed = 0 + self.start_date = Date(2010, 1, 1) + self.end_date = self.start_date + pd.DateOffset(years=31) + self.pop_size = 100_000 + self._scenarios = self._get_scenarios() + self.number_of_draws = len(self._scenarios) + self.runs_per_draw = 10 + + def log_configuration(self): + return { + 'filename': 'effect_of_actual_vs_funded', + 'directory': Path('./outputs'), # <- (specified only for local running) + 'custom_levels': { + '*': logging.WARNING, + 'tlo.methods.demography': logging.INFO, + 'tlo.methods.demography.detail': logging.WARNING, + 'tlo.methods.healthburden': logging.INFO, + 'tlo.methods.healthsystem.summary': logging.INFO, + } + } + + def modules(self): + return ( + fullmodel(resourcefilepath=self.resources) + + [ImprovedHealthSystemAndCareSeekingScenarioSwitcher(resourcefilepath=self.resources)] + ) + + def draw_parameters(self, draw_number, rng): + if draw_number < self.number_of_draws: + return list(self._scenarios.values())[draw_number] + else: + return + + def _get_scenarios(self) -> Dict[str, Dict]: + """Return the Dict with values for the parameters that are changed, keyed by a name for the scenario. + """ + + self.YEAR_OF_CHANGE = 2024 + + return { + + "No growth actual": + mix_scenarios( + self._baseline(), + { + "HealthSystem": { + "use_funded_or_actual_staffing_postSwitch": "actual", + }, + } + ), + + "No growth funded": + mix_scenarios( + self._baseline(), + { + "HealthSystem": { + "use_funded_or_actual_staffing_postSwitch": "funded", + }, + } + ), + + } + + def _baseline(self) -> Dict: + """Return the Dict with values for the parameter changes that define the baseline scenario. """ + return mix_scenarios( + get_parameters_for_status_quo(), + { + "HealthSystem": { + "mode_appt_constraints": 1, # <-- Mode 1 prior to change to preserve calibration + "mode_appt_constraints_postSwitch": 2, # <-- Mode 2 post-change to show effects of HRH + "year_mode_switch": self.YEAR_OF_CHANGE, + "use_funded_or_actual_staffing": 'actual', + "year_use_funded_or_actual_staffing_switch": self.YEAR_OF_CHANGE, + "scale_to_effective_capabilities": False, + "policy_name": "Naive", + "tclose_overwrite": 1, + "tclose_days_offset_overwrite": 7, + "cons_availability": "default", + } + }, + ) + + +if __name__ == '__main__': + from tlo.cli import scenario_run + + scenario_run([__file__]) diff --git a/src/scripts/rti_emulator/analysis_extract_data.py b/src/scripts/rti_emulator/analysis_extract_data.py new file mode 100644 index 0000000000..4649cc6d91 --- /dev/null +++ b/src/scripts/rti_emulator/analysis_extract_data.py @@ -0,0 +1,549 @@ +"""Produce plots to show the health impact (deaths, dalys) each the healthcare system (overall health impact) when +running under different MODES and POLICIES (scenario_test_rti_emulator)""" + +# short tclose -> ideal case +# long tclose -> status quo +import argparse +from pathlib import Path +from typing import Tuple + +import numpy as np +import pandas as pd +from matplotlib import pyplot as plt +from matplotlib.colors import Normalize +from scipy.optimize import curve_fit + +from tlo import Date +from tlo.analysis.utils import extract_results, summarize +from tlo.analysis.life_expectancy import get_life_expectancy_estimates +from tlo.analysis.utils import ( + CAUSE_OF_DEATH_OR_DALY_LABEL_TO_COLOR_MAP, + extract_results, + format_gbd, + get_color_cause_of_death_or_daly_label, + load_pickled_dataframes, + make_age_grp_lookup, + make_age_grp_types, + make_calendar_period_lookup, + make_calendar_period_type, + order_of_cause_of_death_or_daly_label, + plot_clustered_stacked, + summarize, +) +tag = 'emulated' +#tag = 'normal' + +# Range of years considered +min_year = 2010 +max_year = 2019 + +appt_dict = {'Under5OPD': 'OPD', + 'Over5OPD': 'OPD', + 'AntenatalFirst': 'AntenatalTotal', + 'ANCSubsequent': 'AntenatalTotal', + 'NormalDelivery': 'Delivery', + 'CompDelivery': 'Delivery', + 'EstMedCom': 'EstAdult', + 'EstNonCom': 'EstAdult', + 'VCTPositive': 'VCTTests', + 'VCTNegative': 'VCTTests', + 'DentAccidEmerg': 'DentalAll', + 'DentSurg': 'DentalAll', + 'DentU5': 'DentalAll', + 'DentO5': 'DentalAll', + 'MentOPD': 'MentalAll', + 'MentClinic': 'MentalAll' + } + +def apply(results_folder: Path, output_folder: Path, resourcefilepath: Path = None, ): + """Produce standard set of plots describing the effect of each TREATMENT_ID. + - We estimate the epidemiological impact as the EXTRA deaths that would occur if that treatment did not occur. + - We estimate the draw on healthcare system resources as the FEWER appointments when that treatment does not occur. + """ + + TARGET_PERIOD = (Date(min_year, 1, 1), Date(max_year, 1, 1)) + + # Definitions of general helper functions + make_graph_file_name = lambda stub: output_folder / f"{stub.replace('*', '_star_')}.png" # noqa: E731 + + def target_period() -> str: + """Returns the target period as a string of the form YYYY-YYYY""" + return "-".join(str(t.year) for t in TARGET_PERIOD) + + def get_parameter_names_from_scenario_file() -> Tuple[str]: + """Get the tuple of names of the scenarios from `Scenario` class used to create the results.""" + from scripts.rti_emulator.scenario_test_rti_emulator import ( + GenerateDataChains, + ) + e = GenerateDataChains() + return tuple(e._scenarios.keys()) + + def get_num_deaths(_df): + """Return total number of Deaths (total within the TARGET_PERIOD) + """ + return pd.Series(data=len(_df.loc[pd.to_datetime(_df.date).between(*TARGET_PERIOD)])) + + def get_num_dalys(_df): + """Return total number of DALYs (Stacked) by label (total within the TARGET_PERIOD)""" + return pd.Series( + data=_df + .loc[_df.year.between(*[i.year for i in TARGET_PERIOD])] + .drop(columns=['date', 'sex', 'age_range', 'year']) + .sum().sum() + ) + + def get_num_dalys_by_cause(_df): + """Return number of DALYs by cause by label (total within the TARGET_PERIOD)""" + return pd.Series( + data=_df + .loc[_df.year.between(*[i.year for i in TARGET_PERIOD])] + .drop(columns=['date', 'sex', 'age_range', 'year']) + .sum() + ) + + def set_param_names_as_column_index_level_0(_df): + """Set the columns index (level 0) as the param_names.""" + ordered_param_names_no_prefix = {i: x for i, x in enumerate(param_names)} + names_of_cols_level0 = [ordered_param_names_no_prefix.get(col) for col in _df.columns.levels[0]] + assert len(names_of_cols_level0) == len(_df.columns.levels[0]) + _df.columns = _df.columns.set_levels(names_of_cols_level0, level=0) + return _df + + def find_difference_relative_to_comparison(_ser: pd.Series, + comparison: str, + scaled: bool = False, + drop_comparison: bool = True, + ): + """Find the difference in the values in a pd.Series with a multi-index, between the draws (level 0) + within the runs (level 1), relative to where draw = `comparison`. + The comparison is `X - COMPARISON`.""" + return _ser \ + .unstack(level=0) \ + .apply(lambda x: (x - x[comparison]) / (x[comparison] if scaled else 1.0), axis=1) \ + .drop(columns=([comparison] if drop_comparison else [])) \ + .stack() + + + def get_counts_of_hsi_by_treatment_id(_df): + """Get the counts of the short TREATMENT_IDs occurring""" + _counts_by_treatment_id = _df \ + .loc[pd.to_datetime(_df['date']).between(*TARGET_PERIOD), 'TREATMENT_ID'] \ + .apply(pd.Series) \ + .sum() \ + .astype(int) + return _counts_by_treatment_id.groupby(level=0).sum() + + year_target = 2023 + def get_counts_of_hsi_by_treatment_id_by_year(_df): + """Get the counts of the short TREATMENT_IDs occurring""" + _counts_by_treatment_id = _df \ + .loc[pd.to_datetime(_df['date']).dt.year ==year_target, 'TREATMENT_ID'] \ + .apply(pd.Series) \ + .sum() \ + .astype(int) + return _counts_by_treatment_id.groupby(level=0).sum() + + def get_counts_of_hsi_by_short_treatment_id(_df): + """Get the counts of the short TREATMENT_IDs occurring (shortened, up to first underscore)""" + _counts_by_treatment_id = get_counts_of_hsi_by_treatment_id(_df) + _short_treatment_id = _counts_by_treatment_id.index.map(lambda x: x.split('_')[0] + "*") + return _counts_by_treatment_id.groupby(by=_short_treatment_id).sum() + + def get_counts_of_hsi_by_short_treatment_id_by_year(_df): + """Get the counts of the short TREATMENT_IDs occurring (shortened, up to first underscore)""" + _counts_by_treatment_id = get_counts_of_hsi_by_treatment_id_by_year(_df) + _short_treatment_id = _counts_by_treatment_id.index.map(lambda x: x.split('_')[0] + "*") + return _counts_by_treatment_id.groupby(by=_short_treatment_id).sum() + + def get_annual_num_appts_by_level(results_folder: Path) -> pd.DataFrame: + """Return pd.DataFrame gives the (mean) simulated annual number of appointments of each type at each level.""" + + def get_counts_of_appts(_df): + """Get the mean number of appointments of each type being used each year at each level. + Need to rename appts to match standardized categories from the DHIS2 data.""" + + def unpack_nested_dict_in_series(_raw: pd.Series): + return pd.concat( + { + idx: pd.DataFrame.from_dict(mydict) for idx, mydict in _raw.items() + } + ).unstack().fillna(0.0).astype(int) + + return _df \ + .loc[pd.to_datetime(_df['date']).between(*TARGET_PERIOD), 'Number_By_Appt_Type_Code_And_Level'] \ + .pipe(unpack_nested_dict_in_series) \ + .rename(columns=appt_dict, level=1) \ + .rename(columns={'1b': '2'}, level=0) \ + .groupby(level=[0, 1], axis=1).sum() \ + .mean(axis=0) # mean over each year (row) + + return summarize( + extract_results( + results_folder, + module='tlo.methods.healthsystem.summary', + key='HSI_Event', + custom_generate_series=get_counts_of_appts, + do_scaling=True + ), + only_mean=False, + collapse_columns=True, + ).unstack().astype(int) + + def get_annual_num_appts_by_level_with_confidence_interval(results_folder: Path) -> pd.DataFrame: + """Return pd.DataFrame gives the (mean) simulated annual number of appointments of each type at each level, + with 95% confidence interval.""" + #param_names = get_parameter_names_from_scenario_file() + def get_counts_of_appts(_df): + """Get the mean number of appointments of each type being used each year at each level. + Need to rename appts to match standardized categories from the DHIS2 data.""" + + def unpack_nested_dict_in_series(_raw: pd.Series): + return pd.concat( + { + idx: pd.DataFrame.from_dict(mydict) for idx, mydict in _raw.iteritems() + } + ).unstack().fillna(0.0).astype(int) + + return _df \ + .loc[pd.to_datetime(_df['date']).between(*TARGET_PERIOD), 'Number_By_Appt_Type_Code_And_Level'] \ + .pipe(unpack_nested_dict_in_series) \ + .rename(columns=appt_dict, level=1) \ + .rename(columns={'1b': '2'}, level=0) \ + .groupby(level=[0, 1], axis=1).sum() \ + .mean(axis=0) # mean over each year (row) + + return summarize( + extract_results( + results_folder, + module='tlo.methods.healthsystem.summary', + key='HSI_Event', + custom_generate_series=get_counts_of_appts, + do_scaling=True + ), + only_mean=False, + collapse_columns=True, + ).unstack().astype(int) + + + model = get_annual_num_appts_by_level(results_folder=results_folder) + model.to_csv('ConvertedOutputs/Total_Appt_Footprint_' + tag + '.csv', index=True) + #exit(-1) + # Obtain parameter names for this scenario file + param_names = get_parameter_names_from_scenario_file() + print(param_names) + + # ================================================================================================ + # TIME EVOLUTION OF TOTAL DALYs + # Plot DALYs averted compared to the ``No Policy'' policy + + year_target = 2023 # This global variable will be passed to custom function + def get_num_dalys_by_year(_df): + """Return total number of DALYs (Stacked) by label (total within the TARGET_PERIOD)""" + return pd.Series( + data=_df + .loc[_df.year == year_target] + .drop(columns=['date', 'sex', 'age_range', 'year']) + .sum().sum() + ) + + ALL = {} + # Plot time trend show year prior transition as well to emphasise that until that point DALYs incurred + # are consistent across different policies + this_min_year = 2010 + for year in range(this_min_year, max_year+1): + year_target = year + num_dalys_by_year = extract_results( + results_folder, + module='tlo.methods.healthburden', + key='dalys_stacked', + custom_generate_series=get_num_dalys_by_year, + do_scaling=True + ).pipe(set_param_names_as_column_index_level_0) + ALL[year_target] = num_dalys_by_year + # Concatenate the DataFrames into a single DataFrame + concatenated_df = pd.concat(ALL.values(), keys=ALL.keys()) + concatenated_df.index = concatenated_df.index.set_names(['date', 'index_original']) + concatenated_df = concatenated_df.reset_index(level='index_original',drop=True) + dalys_by_year = concatenated_df + print(dalys_by_year) + dalys_by_year_summarise = summarize(dalys_by_year).unstack().astype(int) + + dalys_by_year.to_csv('ConvertedOutputs/Total_DALYs_with_time_' + tag + '.csv', index=True) + dalys_by_year_summarise.to_csv('ConvertedOutputs/Total_DALYs_with_time_summarised_' + tag + '.csv', index=True) + + # ================================================================================================ + # Print population under each scenario + pop_model = extract_results(results_folder, + module="tlo.methods.demography", + key="population", + column="total", + index="date", + do_scaling=True + ).pipe(set_param_names_as_column_index_level_0) + + pop_model.index = pop_model.index.year + pop_model = pop_model[(pop_model.index >= this_min_year) & (pop_model.index <= max_year)] + print(pop_model) + assert dalys_by_year.index.equals(pop_model.index) + assert all(dalys_by_year.columns == pop_model.columns) + pop_model.to_csv('ConvertedOutputs/Population_with_time_' + tag + '.csv', index=True) + + # ================================================================================================ + # DEATHSs BROKEN DOWN BY CAUSES + # %% Quantify the health losses associated with all interventions combined. + + def get_num_deaths_by_cause_label(_df): + """Return total number of Deaths by label (total by age-group within the TARGET_PERIOD) + """ + return _df \ + .loc[pd.to_datetime(_df.date).between(*TARGET_PERIOD)] \ + .groupby(_df['label']) \ + .size() + + num_deaths_by_cause_label = summarize( + extract_results( + results_folder, + module='tlo.methods.demography', + key='death', + custom_generate_series=get_num_deaths_by_cause_label, + do_scaling=True + ).pipe(set_param_names_as_column_index_level_0) + ) + print("Total number of deaths") + print(num_deaths_by_cause_label) + + year_target = 2023 # This global variable will be passed to custom function + def get_num_deaths_by_year_and_cause(_df): + """Return total number of DALYs (Stacked) by label (total within the TARGET_PERIOD)""" + newTARGET_PERIOD = (Date(year_target, 1, 1), Date(year_target, 12, 31)) + return _df \ + .loc[pd.to_datetime(_df.date).between(*newTARGET_PERIOD)] \ + .groupby(_df['label']) \ + .size() + #.loc[pd.to_datetime(_df.date).dt.year == year_target] \ + + ALL = {} + # Plot time trend show year prior transition as well to emphasise that until that point DALYs incurred + # are consistent across different policies + this_min_year = 2010 + for year in range(this_min_year, max_year+1): + year_target = year + num_deaths_by_year_and_cause = extract_results( + results_folder, + module='tlo.methods.demography', + key='death', + custom_generate_series=get_num_deaths_by_year_and_cause, + do_scaling=True + ).pipe(set_param_names_as_column_index_level_0) + ALL[year_target] = num_deaths_by_year_and_cause #summarize(num_dalys_by_year) + + # Concatenate the DataFrames into a single DataFrame + concatenated_df = pd.concat(ALL.values(), keys=ALL.keys()) + + concatenated_df.index = concatenated_df.index.set_names(['date', 'cause']) + + df_total = concatenated_df + print(df_total) + print(df_total.groupby('cause').cumsum()) + print(summarize(df_total.groupby('cause').sum())) + df_total.to_csv('ConvertedOutputs/Deaths_by_cause_with_time_' + tag + '.csv', index=True) + df_total_summarise = summarize(df_total).unstack().astype(int) + df_total_summarise.to_csv('ConvertedOutputs/Deaths_by_cause_with_time_summarised_' + tag + '.csv', index=True) + + + # ================================================================================================ + # DALYs BROKEN DOWN BY CAUSES AND YEAR + # DALYs by cause per year + # %% Quantify the health losses associated with all interventions combined. + + year_target = 2023 # This global variable will be passed to custom function + def get_num_dalys_by_year_and_cause(_df): + """Return total number of DALYs (Stacked) by label (total within the TARGET_PERIOD)""" + return pd.Series( + data=_df + .loc[_df.year == year_target] + .drop(columns=['date', 'sex', 'age_range', 'year']) + .sum() + ) + + ALL = {} + # Plot time trend show year prior transition as well to emphasise that until that point DALYs incurred + # are consistent across different policies + this_min_year = 2010 + for year in range(this_min_year, max_year+1): + year_target = year + num_dalys_by_year = extract_results( + results_folder, + module='tlo.methods.healthburden', + key='dalys_stacked', + custom_generate_series=get_num_dalys_by_year_and_cause, + do_scaling=True + ).pipe(set_param_names_as_column_index_level_0) + ALL[year_target] = num_dalys_by_year #summarize(num_dalys_by_year) + + # Concatenate the DataFrames into a single DataFrame + concatenated_df = pd.concat(ALL.values(), keys=ALL.keys()) + + concatenated_df.index = concatenated_df.index.set_names(['date', 'cause']) + + df_total = concatenated_df + df_total.to_csv('ConvertedOutputs/DALYS_by_cause_with_time_' + tag + '.csv', index=True) + df_total_summarise = summarize(df_total).unstack().astype(int) + df_total_summarise.to_csv('ConvertedOutputs/DALYS_by_cause_with_time_summarised_' + tag + '.csv', index=True) + + + + + ALL = {} + # Plot time trend show year prior transition as well to emphasise that until that point DALYs incurred + # are consistent across different policies + for year in range(min_year, max_year+1): + year_target = year + + hsi_delivered_by_year = extract_results( + results_folder, + module='tlo.methods.healthsystem.summary', + key='HSI_Event', + custom_generate_series=get_counts_of_hsi_by_short_treatment_id_by_year, + do_scaling=True + ).pipe(set_param_names_as_column_index_level_0) + ALL[year_target] = hsi_delivered_by_year + + # Concatenate the DataFrames into a single DataFrame + concatenated_df = pd.concat(ALL.values(), keys=ALL.keys()) + concatenated_df.index = concatenated_df.index.set_names(['date', 'cause']) + HSI_ran_by_year = concatenated_df + + del ALL + + ALL = {} + # Plot time trend show year prior transition as well to emphasise that until that point DALYs incurred + # are consistent across different policies + for year in range(min_year, max_year+1): + year_target = year + + hsi_not_delivered_by_year = extract_results( + results_folder, + module='tlo.methods.healthsystem.summary', + key='Never_ran_HSI_Event', + custom_generate_series=get_counts_of_hsi_by_short_treatment_id_by_year, + do_scaling=True + ).pipe(set_param_names_as_column_index_level_0) + ALL[year_target] = hsi_not_delivered_by_year + + # Concatenate the DataFrames into a single DataFrame + concatenated_df = pd.concat(ALL.values(), keys=ALL.keys()) + concatenated_df.index = concatenated_df.index.set_names(['date', 'cause']) + HSI_never_ran_by_year = concatenated_df + + HSI_never_ran_by_year = HSI_never_ran_by_year.fillna(0) #clean_df( + HSI_ran_by_year = HSI_ran_by_year.fillna(0) + HSI_total_by_year = HSI_ran_by_year.add(HSI_never_ran_by_year, fill_value=0) + HSI_ran_by_year.to_csv('ConvertedOutputs/HSIs_ran_by_area_with_time_' + tag + '.csv', index=True) + HSI_never_ran_by_year.to_csv('ConvertedOutputs/HSIs_never_ran_by_area_with_time_' + tag + '.csv', index=True) + print(HSI_ran_by_year) + print(HSI_never_ran_by_year) + print(HSI_total_by_year) + + def get_dalys_by_period_sex_agegrp_label(df): + """Sum the dalys by period, sex, age-group and label""" + _, calperiodlookup = make_calendar_period_lookup() + + df['age_grp'] = df['age_range'].astype(make_age_grp_types()) + df["period"] = df["year"].map(calperiodlookup).astype(make_calendar_period_type()) + df = df.drop(columns=['date', 'age_range', 'year']) + df = df.groupby(by=["period", "sex", "age_grp"]).sum().stack() + df.index = df.index.set_names('label', level=3) + return df + + results = extract_results( + results_folder, + module="tlo.methods.healthburden", + key="dalys_stacked_by_age_and_time", # <-- for DALYS stacked by age and time + custom_generate_series=get_dalys_by_period_sex_agegrp_label, + do_scaling=True + ) + + # divide by five to give the average number of deaths per year within the five year period: + results = results.div(5.0) + results_to_store = summarize((results.loc['2010-2014'] + results.loc['2015-2019'])/2) + results_to_store.to_csv('ConvertedOutputs/DALYs_by_sex_age_' + tag + '.csv', index=True) + + + def get_total_num_dalys_by_wealth_and_label(_df): + """Return the total number of DALYS in the TARGET_PERIOD by wealth and cause label.""" + wealth_cats = {5: '0-19%', 4: '20-39%', 3: '40-59%', 2: '60-79%', 1: '80-100%'} + + return _df \ + .loc[_df['year'].between(*[d.year for d in TARGET_PERIOD])] \ + .drop(columns=['date', 'year']) \ + .assign( + li_wealth=lambda x: x['li_wealth'].map(wealth_cats).astype( + pd.CategoricalDtype(wealth_cats.values(), ordered=True) + ) + ).melt(id_vars=['li_wealth'], var_name='label') \ + .groupby(by=['li_wealth', 'label'])['value'] \ + .sum() + + total_num_dalys_by_wealth_and_label = summarize(extract_results( + results_folder, + module="tlo.methods.healthburden", + key="dalys_by_wealth_stacked_by_age_and_time", + custom_generate_series=get_total_num_dalys_by_wealth_and_label, + do_scaling=True, + ), + + collapse_columns=True, + only_mean=False, + ).unstack() + print(total_num_dalys_by_wealth_and_label) + + total_num_dalys_by_wealth_and_label.to_csv('ConvertedOutputs/DALYs_by_wealth_' + tag + '.csv', index=True) + print(total_num_dalys_by_wealth_and_label) + + +if __name__ == "__main__": + rfp = Path('resources') + + parser = argparse.ArgumentParser( + description="Produce plots to show the impact each set of treatments", + formatter_class=argparse.ArgumentDefaultsHelpFormatter, + ) + parser.add_argument( + "--output-path", + help=( + "Directory to write outputs to. If not specified (set to None) outputs " + "will be written to value of --results-path argument." + ), + type=Path, + default=None, + required=False, + ) + parser.add_argument( + "--resources-path", + help="Directory containing resource files", + type=Path, + default=Path('resources'), + required=False, + ) + parser.add_argument( + "--results-path", + type=Path, + help=( + "Directory containing results from running " + "src/scripts/rti_emulator/scenario_test_rti_emulator " + ), + default=None, + required=False + ) + args = parser.parse_args() + assert args.results_path is not None + results_path = args.results_path + + output_path = results_path if args.output_path is None else args.output_path + + apply( + results_folder=results_path, + output_folder=output_path, + resourcefilepath=args.resources_path + ) diff --git a/src/scripts/rti_emulator/scenario_test_rti_emulator.py b/src/scripts/rti_emulator/scenario_test_rti_emulator.py new file mode 100644 index 0000000000..0ef4ac9a35 --- /dev/null +++ b/src/scripts/rti_emulator/scenario_test_rti_emulator.py @@ -0,0 +1,153 @@ +"""This Scenario file run the model to test the use of the RTI emulator + +Run on the batch system using: +``` +tlo batch-submit + src/scripts/analysis_data_generation/scenario_test_rti_emulator.py +``` + +or locally using: +``` + tlo scenario-run src/scripts/rti_emulator/scenario_test_rti_emulator.py +``` + +""" +from pathlib import Path +from typing import Dict + +import pandas as pd + +from tlo import Date, logging +from tlo.analysis.utils import get_parameters_for_status_quo, mix_scenarios +from tlo.methods.fullmodel import fullmodel +from tlo.methods.scenario_switcher import ImprovedHealthSystemAndCareSeekingScenarioSwitcher +from tlo.scenario import BaseScenario +from tlo.methods import ( + alri, + cardio_metabolic_disorders, + care_of_women_during_pregnancy, + simplified_births, + contraception, + demography, + depression, + diarrhoea, + enhanced_lifestyle, + epi, + healthburden, + healthseekingbehaviour, + healthsystem, + hiv, + rti, + labour, + malaria, + newborn_outcomes, + postnatal_supervisor, + pregnancy_supervisor, + stunting, + symptommanager, + tb, + wasting, +) + +class GenerateDataChains(BaseScenario): + def __init__(self): + super().__init__() + self.seed = 0 + self.start_date = Date(2010, 1, 1) + self.end_date = self.start_date + pd.DateOffset(years=10) + self.pop_size = 50000 + self._scenarios = self._get_scenarios() + self.number_of_draws = len(self._scenarios) + self.runs_per_draw = 10 + self.generate_event_chains = True + + def log_configuration(self): + return { + 'filename': 'test_rti_emulator', + 'directory': Path('./outputs'), # <- (specified only for local running) + 'custom_levels': { + '*': logging.WARNING, + 'tlo.methods.demography': logging.INFO, + 'tlo.methods.events': logging.INFO, + 'tlo.methods.demography.detail': logging.WARNING, + 'tlo.methods.healthburden': logging.INFO, + 'tlo.methods.healthsystem.summary': logging.INFO, + } + } + + def modules(self): + # MODIFY + # Here instead of running full module + return [demography.Demography(resourcefilepath=self.resources), + enhanced_lifestyle.Lifestyle(resourcefilepath=self.resources), + healthburden.HealthBurden(resourcefilepath=self.resources), + symptommanager.SymptomManager(resourcefilepath=self.resources, spurious_symptoms=False), + rti.RTI(resourcefilepath=self.resources), + healthseekingbehaviour.HealthSeekingBehaviour(resourcefilepath=self.resources), + simplified_births.SimplifiedBirths(resourcefilepath=self.resources), + healthsystem.HealthSystem(resourcefilepath=self.resources, + mode_appt_constraints=1, + cons_availability='all')] + + # return ( + # fullmodel(resourcefilepath=self.resources) + # + [ImprovedHealthSystemAndCareSeekingScenarioSwitcher(resourcefilepath=self.resources)] + # ) + + def draw_parameters(self, draw_number, rng): + if draw_number < self.number_of_draws: + return list(self._scenarios.values())[draw_number] + else: + return + + # case 1: gfHE = -0.030, factor = 1.01074 + # case 2: gfHE = -0.020, factor = 1.02116 + # case 3: gfHE = -0.015, factor = 1.02637 + # case 4: gfHE = 0.015, factor = 1.05763 + # case 5: gfHE = 0.020, factor = 1.06284 + # case 6: gfHE = 0.030, factor = 1.07326 + + def _get_scenarios(self) -> Dict[str, Dict]: + """Return the Dict with values for the parameters that are changed, keyed by a name for the scenario. + """ + + self.YEAR_OF_CHANGE = 2019 + + return { + + # =========== STATUS QUO ============ + "Baseline": + mix_scenarios( + self._baseline(), + { + "HealthSystem": { + "yearly_HR_scaling_mode": "no_scaling", + }, + } + ), + + } + + def _baseline(self) -> Dict: + """Return the Dict with values for the parameter changes that define the baseline scenario. """ + return mix_scenarios( + get_parameters_for_status_quo(), + { + "HealthSystem": { + "mode_appt_constraints": 1, # <-- Mode 1 prior to change to preserve calibration + "mode_appt_constraints_postSwitch": 2, # <-- Mode 2 post-change to show effects of HRH + "year_mode_switch": self.YEAR_OF_CHANGE, + "scale_to_effective_capabilities": True, + "policy_name": "Naive", + "tclose_overwrite": 1, + "tclose_days_offset_overwrite": 7, + "use_funded_or_actual_staffing": "actual", + "cons_availability": "default", + } + }, + ) + +if __name__ == '__main__': + from tlo.cli import scenario_run + + scenario_run([__file__]) diff --git a/src/tlo/methods/healthsystem.py b/src/tlo/methods/healthsystem.py index a3baa7edff..acbba1af39 100644 --- a/src/tlo/methods/healthsystem.py +++ b/src/tlo/methods/healthsystem.py @@ -1990,29 +1990,30 @@ def on_end_of_year(self) -> None: # I *think* this is correct, but we may wish to discuss further. # To total appt count - for key, value in self.sim.modules['RTI'].HS_Use_by_RTI.items(): - # Extract the category part (ignoring the level) - _, category = key.split('_', 1) - self._summary_counter._appts[category] += value - - # To appt countbroken down by level - for key, value in self.sim.modules['RTI'].HS_Use_by_RTI.items(): - # Split key into level and category - level, category = key.split('_', 1) - level_number = level.replace("Level", "") - - if level_number in self._summary_counter._appts_by_level: # Ensure the level exists in the second dictionary - self._summary_counter._appts_by_level[level_number][category] += value + if 'RTI' in self.sim.modules: + for key, value in self.sim.modules['RTI'].HS_Use_by_RTI.items(): + # Extract the category part (ignoring the level) + _, category = key.split('_', 1) + self._summary_counter._appts[category] += value - # Reset emulator counter. - self.sim.modules['RTI'].HS_Use_by_RTI = Counter({col: 0 for col in self.sim.modules['RTI'].HS_Use_Type}) - - self._summary_counter.write_to_log_and_reset_counters() - self.consumables.on_end_of_year() - self.bed_days.on_end_of_year() - if self._hsi_event_count_log_period == "year": - self._write_hsi_event_counts_to_log_and_reset() - self._write_never_ran_hsi_event_counts_to_log_and_reset() + # To appt countbroken down by level + for key, value in self.sim.modules['RTI'].HS_Use_by_RTI.items(): + # Split key into level and category + level, category = key.split('_', 1) + level_number = level.replace("Level", "") + + if level_number in self._summary_counter._appts_by_level: # Ensure the level exists in the second dictionary + self._summary_counter._appts_by_level[level_number][category] += value + + # Reset emulator counter. + self.sim.modules['RTI'].HS_Use_by_RTI = Counter({col: 0 for col in self.sim.modules['RTI'].HS_Use_Type}) + + self._summary_counter.write_to_log_and_reset_counters() + self.consumables.on_end_of_year() + self.bed_days.on_end_of_year() + if self._hsi_event_count_log_period == "year": + self._write_hsi_event_counts_to_log_and_reset() + self._write_never_ran_hsi_event_counts_to_log_and_reset() def run_population_level_events(self, _list_of_population_hsi_event_tuples: List[HSIEventQueueItem]) -> None: diff --git a/src/tlo/methods/rti.py b/src/tlo/methods/rti.py index ad1069d786..68372e3b9d 100644 --- a/src/tlo/methods/rti.py +++ b/src/tlo/methods/rti.py @@ -23,15 +23,17 @@ from sdv.single_table import CTGANSynthesizer from sdv.single_table import GaussianCopulaSynthesizer +from sdv.single_table import TVAESynthesizer if TYPE_CHECKING: from tlo.methods.hsi_generic_first_appts import HSIEventScheduler from tlo.population import IndividualProperties # Decide whether to use emulator or not -use_emulator = False +use_emulator = True -emulator_path = '/Users/mm2908/Desktop/CTGAN/emulators/RTI_emulator.pkl' +#emulator_path = '/Users/mm2908/Desktop/CTGAN/emulators/RTI_emulator.pkl' +emulator_path = '/Users/mm2908/Desktop/CTGAN/emulators/RTI_emulator_VAE.pkl' # --------------------------------------------------------------------------------------------------------- # MODULE DEFINITIONS # --------------------------------------------------------------------------------------------------------- @@ -71,9 +73,13 @@ def __init__(self, name=None, resourcefilepath=None): # Initialize the counter with all items set to 0 HS_Use_by_RTI = Counter({col: 0 for col in HS_Use_Type}) - RTI_emulator = GaussianCopulaSynthesizer.load( + RTI_emulator = TVAESynthesizer.load( filepath=emulator_path ) + + #GaussianCopulaSynthesizer.load( + # filepath=emulator_path + #) INJURY_INDICES = range(1, 9) @@ -1568,13 +1574,13 @@ def initialise_simulation(self, sim): haven't then it asks whether they should die away from their injuries """ # Begin modelling road traffic injuries - #sim.schedule_event(RTIPollingEvent(self), sim.date + DateOffset(months=0)) + sim.schedule_event(RTIPollingEvent(self), sim.date + DateOffset(months=0)) - #if use_emulator is False: + if use_emulator is False: # Begin checking whether the persons injuries are healed - # sim.schedule_event(RTI_Recovery_Event(self), sim.date + DateOffset(months=0)) + sim.schedule_event(RTI_Recovery_Event(self), sim.date + DateOffset(months=0)) # Begin checking whether those with untreated injuries die - # sim.schedule_event(RTI_Check_Death_No_Med(self), sim.date + DateOffset(months=0)) + sim.schedule_event(RTI_Check_Death_No_Med(self), sim.date + DateOffset(months=0)) # Begin logging the RTI events # sim.schedule_event(RTI_Logging_Event(self), sim.date + DateOffset(months=1)) From 737e1eb64b94721447a6f6c8b880944586fe2cf8 Mon Sep 17 00:00:00 2001 From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com> Date: Wed, 5 Feb 2025 09:40:38 +0000 Subject: [PATCH 11/25] Include option of conditionality --- src/tlo/methods/rti.py | 51 ++++++++++++++++++++++++++++++++++-------- 1 file changed, 42 insertions(+), 9 deletions(-) diff --git a/src/tlo/methods/rti.py b/src/tlo/methods/rti.py index 68372e3b9d..6814a3ca10 100644 --- a/src/tlo/methods/rti.py +++ b/src/tlo/methods/rti.py @@ -24,6 +24,7 @@ from sdv.single_table import CTGANSynthesizer from sdv.single_table import GaussianCopulaSynthesizer from sdv.single_table import TVAESynthesizer +from sdv.sampling import Condition if TYPE_CHECKING: from tlo.methods.hsi_generic_first_appts import HSIEventScheduler @@ -31,9 +32,11 @@ # Decide whether to use emulator or not use_emulator = True - +include_conditionality = True #emulator_path = '/Users/mm2908/Desktop/CTGAN/emulators/RTI_emulator.pkl' -emulator_path = '/Users/mm2908/Desktop/CTGAN/emulators/RTI_emulator_VAE.pkl' +#emulator_path = '/Users/mm2908/Desktop/CTGAN/emulators/RTI_emulator_VAE.pkl' +emulator_path = '/Users/mm2908/Desktop/CTGAN/emulators/new_synthesizer.pkl' + # --------------------------------------------------------------------------------------------------------- # MODULE DEFINITIONS # --------------------------------------------------------------------------------------------------------- @@ -63,6 +66,9 @@ def __init__(self, name=None, resourcefilepath=None): 'HealthSystem', } + # ================================================================================ + # EMULATOR PARAMETERS + # Counters tracking use of HealthSystem by RTI module under use of emulator HS_Use_Type = [ 'Level2_AccidentsandEmerg', 'Level2_DiagRadio', 'Level2_EPI', @@ -80,6 +86,14 @@ def __init__(self, name=None, resourcefilepath=None): #GaussianCopulaSynthesizer.load( # filepath=emulator_path #) + if include_conditionality: + Rti_Services = ['Rti_AcutePainManagement', 'Rti_BurnManagement', 'Rti_FractureCast', 'Rti_Imaging', 'Rti_MajorSurgeries', 'Rti_MedicalIntervention', 'Rti_MinorSurgeries'] + + HS_conditions = {} + + # ================================================================================ + + INJURY_INDICES = range(1, 9) @@ -1060,10 +1074,6 @@ def __init__(self, name=None, resourcefilepath=None): 'rt_disability_permanent': Property(Types.REAL, 'disability weight incurred permanently'), 'rt_date_inj': Property(Types.DATE, 'date of latest injury'), 'rt_road_traffic_inc': Property(Types.BOOL, 'involved in a road traffic injury'), - #'rt_inj_severity': Property(Types.CATEGORICAL, - # 'Injury status relating to road traffic injury: none, mild, severe', - # categories=['none', 'mild', 'severe'], - # ), } else: PROPERTIES = { @@ -1586,6 +1596,21 @@ def initialise_simulation(self, sim): # sim.schedule_event(RTI_Logging_Event(self), sim.date + DateOffset(months=1)) + + if use_emulator and include_conditionality: + # If all services are included, set everything to True + if sim.modules['HealthSystem'].service_availability == ['*']: + for i in sim.modules['RTI'].Rti_Services: + sim.modules['RTI'].HS_conditions[i] = True + else: + for i in sim.modules['RTI'].Rti_Services: + if (i + '_*') in sim.modules['HealthSystem'].service_availability: + sim.modules['RTI'].HS_conditions[i] = True + else: + sim.modules['RTI'].HS_conditions[i] = False + print(sim.modules['HealthSystem'].service_availability) + print(sim.modules['RTI'].HS_conditions) + # Look-up consumable item codes self.look_up_consumable_item_codes() @@ -2972,9 +2997,17 @@ def apply(self, population): # First, sample outcomes for individuals which were selected_for_rti. # For now, don't consider properties of individual when sampling outcome. All we care about is the number of samples. - NN_model = self.sim.modules['RTI'].RTI_emulator.sample(len(selected_for_rti)) - #NN_model = self.sample_NN_model(len(selected_for_rti)) - + if include_conditionality: + condition_for_Rti = Condition( + num_rows=len(selected_for_rti), + column_values=self.sim.modules['RTI'].HS_conditions + ) + NN_model = self.sim.modules['RTI'].RTI_emulator.sample_from_conditions( + conditions=[condition_for_Rti], + ) + else: + NN_model = self.sim.modules['RTI'].RTI_emulator.sample(len(selected_for_rti)) + print("I have sampled ==================") # HS USAGE # Get the total number of different types of appts that will be accessed as a result of this polling event and add to rolling count. for column in self.sim.modules['RTI'].HS_Use_Type: From 7bba772688cb4f165416ff7ed3a84818d499a241 Mon Sep 17 00:00:00 2001 From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com> Date: Thu, 6 Feb 2025 15:21:04 +0000 Subject: [PATCH 12/25] Increase mortality of other causes of death --- src/tlo/methods/demography.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/tlo/methods/demography.py b/src/tlo/methods/demography.py index e58f3895f4..c58b61a42f 100644 --- a/src/tlo/methods/demography.py +++ b/src/tlo/methods/demography.py @@ -1,3 +1,4 @@ + """ The core demography module and its associated events. * Sets initial population size @@ -728,7 +729,10 @@ def apply(self, population): mort_risk = self.mort_risk_per_poll.loc[ self.mort_risk_per_poll.fallbackyear == fallbackyear, [ 'age_years', 'sex', 'prob_of_dying_before_next_poll']].copy() - + + # Artificially increase risk for men under 50 by 50 + mort_risk.loc[(mort_risk["sex"] == "M") & (mort_risk["age_years"] < 50), "prob_of_dying_before_next_poll"] *= 50 + # get the population alive = df.loc[df.is_alive & (df.age_years < MAX_AGE), ['sex', 'age_years']].copy() From a37eab568a25791e28c600e6195a550ee4d9176f Mon Sep 17 00:00:00 2001 From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com> Date: Mon, 10 Feb 2025 12:50:28 +0000 Subject: [PATCH 13/25] Fix scenario file --- .../scenario_test_rti_emulator.py | 25 ++++++------------- 1 file changed, 8 insertions(+), 17 deletions(-) diff --git a/src/scripts/rti_emulator/scenario_test_rti_emulator.py b/src/scripts/rti_emulator/scenario_test_rti_emulator.py index 0ef4ac9a35..5c4e0a5200 100644 --- a/src/scripts/rti_emulator/scenario_test_rti_emulator.py +++ b/src/scripts/rti_emulator/scenario_test_rti_emulator.py @@ -55,11 +55,10 @@ def __init__(self): self.seed = 0 self.start_date = Date(2010, 1, 1) self.end_date = self.start_date + pd.DateOffset(years=10) - self.pop_size = 50000 + self.pop_size = 50_000 self._scenarios = self._get_scenarios() self.number_of_draws = len(self._scenarios) self.runs_per_draw = 10 - self.generate_event_chains = True def log_configuration(self): return { @@ -110,8 +109,6 @@ def draw_parameters(self, draw_number, rng): def _get_scenarios(self) -> Dict[str, Dict]: """Return the Dict with values for the parameters that are changed, keyed by a name for the scenario. """ - - self.YEAR_OF_CHANGE = 2019 return { @@ -119,11 +116,11 @@ def _get_scenarios(self) -> Dict[str, Dict]: "Baseline": mix_scenarios( self._baseline(), - { - "HealthSystem": { - "yearly_HR_scaling_mode": "no_scaling", - }, - } + #{ + # "HealthSystem": { + # "yearly_HR_scaling_mode": "no_scaling", + # }, + #} ), } @@ -134,15 +131,9 @@ def _baseline(self) -> Dict: get_parameters_for_status_quo(), { "HealthSystem": { - "mode_appt_constraints": 1, # <-- Mode 1 prior to change to preserve calibration - "mode_appt_constraints_postSwitch": 2, # <-- Mode 2 post-change to show effects of HRH - "year_mode_switch": self.YEAR_OF_CHANGE, - "scale_to_effective_capabilities": True, - "policy_name": "Naive", - "tclose_overwrite": 1, - "tclose_days_offset_overwrite": 7, + "mode_appt_constraints": 1, "use_funded_or_actual_staffing": "actual", - "cons_availability": "default", + "cons_availability": "all", } }, ) From 2579dd961f5be62bc258b86b304e47813e273e51 Mon Sep 17 00:00:00 2001 From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com> Date: Wed, 12 Feb 2025 11:20:52 +0000 Subject: [PATCH 14/25] Check double counting --- src/tlo/methods/demography.py | 2 +- src/tlo/methods/rti.py | 17 ++++++++--------- 2 files changed, 9 insertions(+), 10 deletions(-) diff --git a/src/tlo/methods/demography.py b/src/tlo/methods/demography.py index c58b61a42f..a846fadd91 100644 --- a/src/tlo/methods/demography.py +++ b/src/tlo/methods/demography.py @@ -731,7 +731,7 @@ def apply(self, population): 'age_years', 'sex', 'prob_of_dying_before_next_poll']].copy() # Artificially increase risk for men under 50 by 50 - mort_risk.loc[(mort_risk["sex"] == "M") & (mort_risk["age_years"] < 50), "prob_of_dying_before_next_poll"] *= 50 + mort_risk.loc[(mort_risk["sex"] == "M") & (mort_risk["age_years"] < 50), "prob_of_dying_before_next_poll"] *= 250 # get the population alive = df.loc[df.is_alive & (df.age_years < MAX_AGE), ['sex', 'age_years']].copy() diff --git a/src/tlo/methods/rti.py b/src/tlo/methods/rti.py index 6814a3ca10..d1e615131a 100644 --- a/src/tlo/methods/rti.py +++ b/src/tlo/methods/rti.py @@ -32,10 +32,10 @@ # Decide whether to use emulator or not use_emulator = True -include_conditionality = True -#emulator_path = '/Users/mm2908/Desktop/CTGAN/emulators/RTI_emulator.pkl' +include_conditionality = False +emulator_path = '/Users/mm2908/Desktop/CTGAN/emulators/RTI_emulator.pkl' #emulator_path = '/Users/mm2908/Desktop/CTGAN/emulators/RTI_emulator_VAE.pkl' -emulator_path = '/Users/mm2908/Desktop/CTGAN/emulators/new_synthesizer.pkl' +#emulator_path = '/Users/mm2908/Desktop/CTGAN/emulators/new_synthesizer.pkl' # --------------------------------------------------------------------------------------------------------- # MODULE DEFINITIONS @@ -79,13 +79,13 @@ def __init__(self, name=None, resourcefilepath=None): # Initialize the counter with all items set to 0 HS_Use_by_RTI = Counter({col: 0 for col in HS_Use_Type}) - RTI_emulator = TVAESynthesizer.load( - filepath=emulator_path - ) - - #GaussianCopulaSynthesizer.load( + #RTI_emulator = TVAESynthesizer.load( # filepath=emulator_path #) + + RTI_emulator = GaussianCopulaSynthesizer.load( + filepath=emulator_path + ) if include_conditionality: Rti_Services = ['Rti_AcutePainManagement', 'Rti_BurnManagement', 'Rti_FractureCast', 'Rti_Imaging', 'Rti_MajorSurgeries', 'Rti_MedicalIntervention', 'Rti_MinorSurgeries'] @@ -3007,7 +3007,6 @@ def apply(self, population): ) else: NN_model = self.sim.modules['RTI'].RTI_emulator.sample(len(selected_for_rti)) - print("I have sampled ==================") # HS USAGE # Get the total number of different types of appts that will be accessed as a result of this polling event and add to rolling count. for column in self.sim.modules['RTI'].HS_Use_Type: From 9902292632eb9489c5a5bf5a4ea701f664530f59 Mon Sep 17 00:00:00 2001 From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com> Date: Wed, 26 Feb 2025 10:14:00 +0000 Subject: [PATCH 15/25] Turn off services and use conditional NN --- .../rti_emulator/analysis_extract_data.py | 32 ++++++++++--------- .../scenario_test_rti_emulator.py | 1 + src/tlo/methods/demography.py | 2 +- src/tlo/methods/rti.py | 6 ++-- 4 files changed, 21 insertions(+), 20 deletions(-) diff --git a/src/scripts/rti_emulator/analysis_extract_data.py b/src/scripts/rti_emulator/analysis_extract_data.py index 4649cc6d91..f003924ee6 100644 --- a/src/scripts/rti_emulator/analysis_extract_data.py +++ b/src/scripts/rti_emulator/analysis_extract_data.py @@ -30,7 +30,8 @@ plot_clustered_stacked, summarize, ) -tag = 'emulated' +tag = 'emulated_with_conditionality_Nothing' +#tag = 'normal_Nothing' #tag = 'normal' # Range of years considered @@ -226,7 +227,7 @@ def unpack_nested_dict_in_series(_raw: pd.Series): model = get_annual_num_appts_by_level(results_folder=results_folder) - model.to_csv('ConvertedOutputs/Total_Appt_Footprint_' + tag + '.csv', index=True) + model.to_csv('ConvertedOutputs/Emulator_Files/Total_Appt_Footprint_' + tag + '.csv', index=True) #exit(-1) # Obtain parameter names for this scenario file param_names = get_parameter_names_from_scenario_file() @@ -268,8 +269,8 @@ def get_num_dalys_by_year(_df): print(dalys_by_year) dalys_by_year_summarise = summarize(dalys_by_year).unstack().astype(int) - dalys_by_year.to_csv('ConvertedOutputs/Total_DALYs_with_time_' + tag + '.csv', index=True) - dalys_by_year_summarise.to_csv('ConvertedOutputs/Total_DALYs_with_time_summarised_' + tag + '.csv', index=True) + dalys_by_year.to_csv('ConvertedOutputs/Emulator_Files/Total_DALYs_with_time_' + tag + '.csv', index=True) + dalys_by_year_summarise.to_csv('ConvertedOutputs/Emulator_Files/Total_DALYs_with_time_summarised_' + tag + '.csv', index=True) # ================================================================================================ # Print population under each scenario @@ -286,7 +287,7 @@ def get_num_dalys_by_year(_df): print(pop_model) assert dalys_by_year.index.equals(pop_model.index) assert all(dalys_by_year.columns == pop_model.columns) - pop_model.to_csv('ConvertedOutputs/Population_with_time_' + tag + '.csv', index=True) + pop_model.to_csv('ConvertedOutputs/Emulator_Files/Population_with_time_' + tag + '.csv', index=True) # ================================================================================================ # DEATHSs BROKEN DOWN BY CAUSES @@ -346,9 +347,9 @@ def get_num_deaths_by_year_and_cause(_df): print(df_total) print(df_total.groupby('cause').cumsum()) print(summarize(df_total.groupby('cause').sum())) - df_total.to_csv('ConvertedOutputs/Deaths_by_cause_with_time_' + tag + '.csv', index=True) + df_total.to_csv('ConvertedOutputs/Emulator_Files/Deaths_by_cause_with_time_' + tag + '.csv', index=True) df_total_summarise = summarize(df_total).unstack().astype(int) - df_total_summarise.to_csv('ConvertedOutputs/Deaths_by_cause_with_time_summarised_' + tag + '.csv', index=True) + df_total_summarise.to_csv('ConvertedOutputs/Emulator_Files/Deaths_by_cause_with_time_summarised_' + tag + '.csv', index=True) # ================================================================================================ @@ -385,12 +386,13 @@ def get_num_dalys_by_year_and_cause(_df): concatenated_df = pd.concat(ALL.values(), keys=ALL.keys()) concatenated_df.index = concatenated_df.index.set_names(['date', 'cause']) - + df_total = concatenated_df - df_total.to_csv('ConvertedOutputs/DALYS_by_cause_with_time_' + tag + '.csv', index=True) + print(df_total) + df_total.to_csv('ConvertedOutputs/Emulator_Files/DALYS_by_cause_with_time_' + tag + '.csv', index=True) df_total_summarise = summarize(df_total).unstack().astype(int) - df_total_summarise.to_csv('ConvertedOutputs/DALYS_by_cause_with_time_summarised_' + tag + '.csv', index=True) - + print(df_total_summarise) + df_total_summarise.to_csv('ConvertedOutputs/Emulator_Files/DALYS_by_cause_with_time_summarised_' + tag + '.csv', index=True) @@ -439,8 +441,8 @@ def get_num_dalys_by_year_and_cause(_df): HSI_never_ran_by_year = HSI_never_ran_by_year.fillna(0) #clean_df( HSI_ran_by_year = HSI_ran_by_year.fillna(0) HSI_total_by_year = HSI_ran_by_year.add(HSI_never_ran_by_year, fill_value=0) - HSI_ran_by_year.to_csv('ConvertedOutputs/HSIs_ran_by_area_with_time_' + tag + '.csv', index=True) - HSI_never_ran_by_year.to_csv('ConvertedOutputs/HSIs_never_ran_by_area_with_time_' + tag + '.csv', index=True) + HSI_ran_by_year.to_csv('ConvertedOutputs/Emulator_Files/HSIs_ran_by_area_with_time_' + tag + '.csv', index=True) + HSI_never_ran_by_year.to_csv('ConvertedOutputs/Emulator_Files/HSIs_never_ran_by_area_with_time_' + tag + '.csv', index=True) print(HSI_ran_by_year) print(HSI_never_ran_by_year) print(HSI_total_by_year) @@ -467,7 +469,7 @@ def get_dalys_by_period_sex_agegrp_label(df): # divide by five to give the average number of deaths per year within the five year period: results = results.div(5.0) results_to_store = summarize((results.loc['2010-2014'] + results.loc['2015-2019'])/2) - results_to_store.to_csv('ConvertedOutputs/DALYs_by_sex_age_' + tag + '.csv', index=True) + results_to_store.to_csv('ConvertedOutputs/Emulator_Files/DALYs_by_sex_age_' + tag + '.csv', index=True) def get_total_num_dalys_by_wealth_and_label(_df): @@ -498,7 +500,7 @@ def get_total_num_dalys_by_wealth_and_label(_df): ).unstack() print(total_num_dalys_by_wealth_and_label) - total_num_dalys_by_wealth_and_label.to_csv('ConvertedOutputs/DALYs_by_wealth_' + tag + '.csv', index=True) + total_num_dalys_by_wealth_and_label.to_csv('ConvertedOutputs/Emulator_Files/DALYs_by_wealth_' + tag + '.csv', index=True) print(total_num_dalys_by_wealth_and_label) diff --git a/src/scripts/rti_emulator/scenario_test_rti_emulator.py b/src/scripts/rti_emulator/scenario_test_rti_emulator.py index 5c4e0a5200..e939ca0ac2 100644 --- a/src/scripts/rti_emulator/scenario_test_rti_emulator.py +++ b/src/scripts/rti_emulator/scenario_test_rti_emulator.py @@ -134,6 +134,7 @@ def _baseline(self) -> Dict: "mode_appt_constraints": 1, "use_funded_or_actual_staffing": "actual", "cons_availability": "all", + "Service_Availability": [] } }, ) diff --git a/src/tlo/methods/demography.py b/src/tlo/methods/demography.py index a846fadd91..cf81c18804 100644 --- a/src/tlo/methods/demography.py +++ b/src/tlo/methods/demography.py @@ -731,7 +731,7 @@ def apply(self, population): 'age_years', 'sex', 'prob_of_dying_before_next_poll']].copy() # Artificially increase risk for men under 50 by 50 - mort_risk.loc[(mort_risk["sex"] == "M") & (mort_risk["age_years"] < 50), "prob_of_dying_before_next_poll"] *= 250 + # mort_risk.loc[(mort_risk["sex"] == "M") & (mort_risk["age_years"] < 50), "prob_of_dying_before_next_poll"] *= 250 # get the population alive = df.loc[df.is_alive & (df.age_years < MAX_AGE), ['sex', 'age_years']].copy() diff --git a/src/tlo/methods/rti.py b/src/tlo/methods/rti.py index d1e615131a..3c71d774b7 100644 --- a/src/tlo/methods/rti.py +++ b/src/tlo/methods/rti.py @@ -32,8 +32,8 @@ # Decide whether to use emulator or not use_emulator = True -include_conditionality = False -emulator_path = '/Users/mm2908/Desktop/CTGAN/emulators/RTI_emulator.pkl' +include_conditionality = True +emulator_path = '/Users/mm2908/Desktop/EmuIBM/emulators/latest_synthesizer.pkl' #emulator_path = '/Users/mm2908/Desktop/CTGAN/emulators/RTI_emulator_VAE.pkl' #emulator_path = '/Users/mm2908/Desktop/CTGAN/emulators/new_synthesizer.pkl' @@ -1608,8 +1608,6 @@ def initialise_simulation(self, sim): sim.modules['RTI'].HS_conditions[i] = True else: sim.modules['RTI'].HS_conditions[i] = False - print(sim.modules['HealthSystem'].service_availability) - print(sim.modules['RTI'].HS_conditions) # Look-up consumable item codes self.look_up_consumable_item_codes() From ac47db74db30087ee3fcfc12c0dd0aa21e9beb48 Mon Sep 17 00:00:00 2001 From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com> Date: Wed, 5 Mar 2025 12:12:40 +0000 Subject: [PATCH 16/25] Descriptive modification [skip ci] --- src/tlo/methods/rti.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/tlo/methods/rti.py b/src/tlo/methods/rti.py index 3c71d774b7..0ce51d5ef2 100644 --- a/src/tlo/methods/rti.py +++ b/src/tlo/methods/rti.py @@ -30,6 +30,8 @@ from tlo.methods.hsi_generic_first_appts import HSIEventScheduler from tlo.population import IndividualProperties +# Include emulator option + # Decide whether to use emulator or not use_emulator = True include_conditionality = True From 1b0e6ae08bb743c9f8717c247529d85e1a8d3ac4 Mon Sep 17 00:00:00 2001 From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com> Date: Thu, 31 Jul 2025 08:58:36 +0100 Subject: [PATCH 17/25] Update extract data file --- .../rti_emulator/analysis_extract_data.py | 46 +++++++++++++------ 1 file changed, 31 insertions(+), 15 deletions(-) diff --git a/src/scripts/rti_emulator/analysis_extract_data.py b/src/scripts/rti_emulator/analysis_extract_data.py index f003924ee6..2ee463e6d6 100644 --- a/src/scripts/rti_emulator/analysis_extract_data.py +++ b/src/scripts/rti_emulator/analysis_extract_data.py @@ -30,7 +30,19 @@ plot_clustered_stacked, summarize, ) -tag = 'emulated_with_conditionality_Nothing' +tag = 'new_emulated_with_conditionality_All' +#outputs/test_rti_emulator-2025-02-20T092114Z + +#tag = 'new_normal_All' +#outputs/test_rti_emulator-2025-02-10T095938Z + +#tag = 'new_emulated_with_conditionality_All_x250_other' +#outputs/test_rti_emulator-2025-02-19T153503Z + +#tag = 'new_normal_All_x250_other' +#outputs/test_rti_emulator-2025-02-10T171625Z +""" +""" #tag = 'normal_Nothing' #tag = 'normal' @@ -178,17 +190,14 @@ def unpack_nested_dict_in_series(_raw: pd.Series): .groupby(level=[0, 1], axis=1).sum() \ .mean(axis=0) # mean over each year (row) - return summarize( - extract_results( + return extract_results( results_folder, module='tlo.methods.healthsystem.summary', key='HSI_Event', custom_generate_series=get_counts_of_appts, do_scaling=True - ), - only_mean=False, - collapse_columns=True, - ).unstack().astype(int) + ) + def get_annual_num_appts_by_level_with_confidence_interval(results_folder: Path) -> pd.DataFrame: """Return pd.DataFrame gives the (mean) simulated annual number of appointments of each type at each level, @@ -227,7 +236,9 @@ def unpack_nested_dict_in_series(_raw: pd.Series): model = get_annual_num_appts_by_level(results_folder=results_folder) + model_summarised = summarize(model,only_mean=False,collapse_columns=True).unstack().astype(int) model.to_csv('ConvertedOutputs/Emulator_Files/Total_Appt_Footprint_' + tag + '.csv', index=True) + model_summarised.to_csv('ConvertedOutputs/Emulator_Files/Total_Appt_Footprint_summarised_' + tag + '.csv', index=True) #exit(-1) # Obtain parameter names for this scenario file param_names = get_parameter_names_from_scenario_file() @@ -468,8 +479,10 @@ def get_dalys_by_period_sex_agegrp_label(df): # divide by five to give the average number of deaths per year within the five year period: results = results.div(5.0) - results_to_store = summarize((results.loc['2010-2014'] + results.loc['2015-2019'])/2) + results_to_store = (results.loc['2010-2014'] + results.loc['2015-2019'])/2 results_to_store.to_csv('ConvertedOutputs/Emulator_Files/DALYs_by_sex_age_' + tag + '.csv', index=True) + results_to_store = summarize(results_to_store) + results_to_store.to_csv('ConvertedOutputs/Emulator_Files/DALYs_by_sex_age_summarised_' + tag + '.csv', index=True) def get_total_num_dalys_by_wealth_and_label(_df): @@ -487,21 +500,24 @@ def get_total_num_dalys_by_wealth_and_label(_df): .groupby(by=['li_wealth', 'label'])['value'] \ .sum() - total_num_dalys_by_wealth_and_label = summarize(extract_results( + total_num_dalys_by_wealth_and_label = extract_results( results_folder, module="tlo.methods.healthburden", key="dalys_by_wealth_stacked_by_age_and_time", custom_generate_series=get_total_num_dalys_by_wealth_and_label, do_scaling=True, - ), - + ) + + total_num_dalys_by_wealth_and_label.to_csv('ConvertedOutputs/Emulator_Files/DALYs_by_wealth_' + tag + '.csv', index=True) + + total_num_dalys_by_wealth_and_label = summarize(total_num_dalys_by_wealth_and_label, collapse_columns=True, only_mean=False, ).unstack() - print(total_num_dalys_by_wealth_and_label) - - total_num_dalys_by_wealth_and_label.to_csv('ConvertedOutputs/Emulator_Files/DALYs_by_wealth_' + tag + '.csv', index=True) - print(total_num_dalys_by_wealth_and_label) + + total_num_dalys_by_wealth_and_label.to_csv('ConvertedOutputs/Emulator_Files/DALYs_by_wealth_summarised_' + tag + '.csv', index=True) + + if __name__ == "__main__": From fbc100d22bb61d3651458b1b689194f7f6531733 Mon Sep 17 00:00:00 2001 From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com> Date: Mon, 18 Aug 2025 14:05:22 +0100 Subject: [PATCH 18/25] Make timing process based, add label to file output and create scenarios --- .../rti_emulator/analysis_extract_data.py | 4 +- src/tlo/methods/rti.py | 72 ++++++++++--------- src/tlo/simulation.py | 19 +++-- 3 files changed, 58 insertions(+), 37 deletions(-) diff --git a/src/scripts/rti_emulator/analysis_extract_data.py b/src/scripts/rti_emulator/analysis_extract_data.py index 2ee463e6d6..16ba64c492 100644 --- a/src/scripts/rti_emulator/analysis_extract_data.py +++ b/src/scripts/rti_emulator/analysis_extract_data.py @@ -30,7 +30,9 @@ plot_clustered_stacked, summarize, ) -tag = 'new_emulated_with_conditionality_All' +tag = 'new_normal_All' + +#tag = 'new_emulated_with_conditionality_All' #outputs/test_rti_emulator-2025-02-20T092114Z #tag = 'new_normal_All' diff --git a/src/tlo/methods/rti.py b/src/tlo/methods/rti.py index 0ce51d5ef2..0407a2fec9 100644 --- a/src/tlo/methods/rti.py +++ b/src/tlo/methods/rti.py @@ -30,12 +30,19 @@ from tlo.methods.hsi_generic_first_appts import HSIEventScheduler from tlo.population import IndividualProperties -# Include emulator option - -# Decide whether to use emulator or not -use_emulator = True include_conditionality = True -emulator_path = '/Users/mm2908/Desktop/EmuIBM/emulators/latest_synthesizer.pkl' +emulator_path = '/Users/mm2908/Desktop/EmuIBM/emulators/latest_CTGANSynthesizer_epochs500_dsF_batch_size500_num_k_folds10_Nsubsample10000_InAndOutC_test_k_folding_UniformEncoder_CTGANtest3_repeat_seed42_k_fold0.pkl' + +rti_scenario = "emulator" #standard # "emulator", "noRTIpoll" +if rti_scenario == "emulator": + use_emulator = True +elif rti_scenario == "standard" or rti_scenario == "noRTIpoll": + use_emulator = False +else: + print("ERROR, I don't have scenario") + exit(-1) + +#emulator_path = '/Users/mm2908/Desktop/EmuIBM/emulators/latest_synthesizer.pkl' #emulator_path = '/Users/mm2908/Desktop/CTGAN/emulators/RTI_emulator_VAE.pkl' #emulator_path = '/Users/mm2908/Desktop/CTGAN/emulators/new_synthesizer.pkl' @@ -70,7 +77,6 @@ def __init__(self, name=None, resourcefilepath=None): # ================================================================================ # EMULATOR PARAMETERS - # Counters tracking use of HealthSystem by RTI module under use of emulator HS_Use_Type = [ 'Level2_AccidentsandEmerg', 'Level2_DiagRadio', 'Level2_EPI', @@ -85,11 +91,14 @@ def __init__(self, name=None, resourcefilepath=None): # filepath=emulator_path #) - RTI_emulator = GaussianCopulaSynthesizer.load( - filepath=emulator_path - ) + #RTI_emulator = GaussianCopulaSynthesizer.load( + # filepath=emulator_path + #) + + RTI_emulator = CTGANSynthesizer.load(filepath=emulator_path) + if include_conditionality: - Rti_Services = ['Rti_AcutePainManagement', 'Rti_BurnManagement', 'Rti_FractureCast', 'Rti_Imaging', 'Rti_MajorSurgeries', 'Rti_MedicalIntervention', 'Rti_MinorSurgeries'] + Rti_Services = ['Rti_AcutePainManagement','Rti_BurnManagement','Rti_FractureCast','Rti_Imaging','Rti_MajorSurgeries','Rti_MedicalIntervention','Rti_MinorSurgeries','Rti_OpenFractureTreatment','Rti_ShockTreatment','Rti_Suture','Rti_TetanusVaccine'] HS_conditions = {} @@ -1586,30 +1595,29 @@ def initialise_simulation(self, sim): haven't then it asks whether they should die away from their injuries """ # Begin modelling road traffic injuries - sim.schedule_event(RTIPollingEvent(self), sim.date + DateOffset(months=0)) - - if use_emulator is False: - # Begin checking whether the persons injuries are healed - sim.schedule_event(RTI_Recovery_Event(self), sim.date + DateOffset(months=0)) - # Begin checking whether those with untreated injuries die - sim.schedule_event(RTI_Check_Death_No_Med(self), sim.date + DateOffset(months=0)) + if rti_scenario == "standard" or rti_scenario == "emulator": + sim.schedule_event(RTIPollingEvent(self), sim.date + DateOffset(months=0)) - # Begin logging the RTI events - # sim.schedule_event(RTI_Logging_Event(self), sim.date + DateOffset(months=1)) - - - - if use_emulator and include_conditionality: - # If all services are included, set everything to True - if sim.modules['HealthSystem'].service_availability == ['*']: - for i in sim.modules['RTI'].Rti_Services: - sim.modules['RTI'].HS_conditions[i] = True - else: - for i in sim.modules['RTI'].Rti_Services: - if (i + '_*') in sim.modules['HealthSystem'].service_availability: + if use_emulator is False: + # Begin checking whether the persons injuries are healed + sim.schedule_event(RTI_Recovery_Event(self), sim.date + DateOffset(months=0)) + # Begin checking whether those with untreated injuries die + sim.schedule_event(RTI_Check_Death_No_Med(self), sim.date + DateOffset(months=0)) + + # Begin logging the RTI events + # sim.schedule_event(RTI_Logging_Event(self), sim.date + DateOffset(months=1)) + + if use_emulator and include_conditionality: + # If all services are included, set everything to True + if sim.modules['HealthSystem'].service_availability == ['*']: + for i in sim.modules['RTI'].Rti_Services: sim.modules['RTI'].HS_conditions[i] = True - else: - sim.modules['RTI'].HS_conditions[i] = False + else: + for i in sim.modules['RTI'].Rti_Services: + if (i + '_*') in sim.modules['HealthSystem'].service_availability: + sim.modules['RTI'].HS_conditions[i] = True + else: + sim.modules['RTI'].HS_conditions[i] = False # Look-up consumable item codes self.look_up_consumable_item_codes() diff --git a/src/tlo/simulation.py b/src/tlo/simulation.py index 5bca317535..19398cfd9c 100644 --- a/src/tlo/simulation.py +++ b/src/tlo/simulation.py @@ -9,6 +9,7 @@ from collections import OrderedDict from pathlib import Path from typing import TYPE_CHECKING, Optional +from tlo.methods.rti import rti_scenario import numpy as np @@ -349,8 +350,8 @@ def run_simulation_to(self, *, to_date: Date) -> None: :param to_date: Date to simulate up to but not including - must be before or equal to simulation end date specified in call to :py:meth:`initialise`. """ - start_time = time.time() - + start_time = time.process_time() + if not self._initialised: msg = "Simulation must be initialised before calling run_simulation_to" raise SimulationNotInitialisedError(msg) @@ -369,8 +370,18 @@ def run_simulation_to(self, *, to_date: Date) -> None: self.date = to_date if self.show_progress_bar: progress_bar.stop() - end_time = time.time()- start_time - with open('duration_time.txt', 'a') as file: + end_time = time.process_time()- start_time + if rti_scenario == "standard": + filename = 'duration_time_standardRTI_200000ind.txt' + elif rti_scenario == "emulator": + filename = 'duration_time_emulatedRTI_200000ind.txt' + elif rti_scenario == "noRTIpoll": + filename = 'duration_time_noRTIPoll_200000ind.txt' + else: + print("ERROR: I don't know pathname") + exit(-1) + + with open(filename, 'a') as file: # Append text to the file file.write(f"Time taken, {end_time}\n") print("Time taken", end_time) From 94ac5a47dafa042c99e832b62eb925b4065464d6 Mon Sep 17 00:00:00 2001 From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com> Date: Mon, 8 Sep 2025 08:46:13 +0100 Subject: [PATCH 19/25] Consider x250 other causes of mortality and track unique file to process outputs and generate data --- .../final_analysis_extract_data.py | 501 ++++++++++++++++++ .../scenario_test_rti_emulator.py | 2 +- src/tlo/methods/demography.py | 4 +- src/tlo/methods/rti.py | 4 +- src/tlo/simulation.py | 6 +- 5 files changed, 509 insertions(+), 8 deletions(-) create mode 100644 src/scripts/rti_emulator/final_analysis_extract_data.py diff --git a/src/scripts/rti_emulator/final_analysis_extract_data.py b/src/scripts/rti_emulator/final_analysis_extract_data.py new file mode 100644 index 0000000000..a3f935edb7 --- /dev/null +++ b/src/scripts/rti_emulator/final_analysis_extract_data.py @@ -0,0 +1,501 @@ +"""Produce plots to show the health impact (deaths, dalys) each the healthcare system (overall health impact) when +running under different MODES and POLICIES (scenario_impact_of_capabilities_expansion_scaling.py)""" + +# short tclose -> ideal case +# long tclose -> status quo +import argparse +from pathlib import Path +from typing import Tuple + +import pandas as pd +from tlo import Date +from tlo.analysis.utils import extract_results +import matplotlib.pyplot as plt + + +outputs = { + 'standard_RTI': {'results_folder' : Path('outputs/test_rti_emulator-2025-08-12T205454Z'), 'data': {}}, + 'emulated_RTI': {'results_folder' : Path('outputs/test_rti_emulator-2025-08-13T080302Z'), 'data' : {}}, + 'no_RTI': {'results_folder' : Path('outputs/test_rti_emulator-2025-08-13T143342Z'), 'data' : {}}, + } + + +def apply(results_folder: Path) -> Tuple: + + tag = results_folder.name + + def get_parameter_names_from_scenario_file() -> Tuple[str]: + """Get the tuple of names of the scenarios from `Scenario` class used to create the results.""" + from scripts.healthsystem.impact_of_const_capabilities_expansion.scenario_impact_of_capabilities_expansion_scaling import ( + ImpactOfHealthSystemMode, + ) + e = ImpactOfHealthSystemMode() + return tuple(e._scenarios.keys()) + + def set_param_names_as_column_index_level_0(_df): + """Set the columns index (level 0) as the param_names.""" + ordered_param_names_no_prefix = {i: x for i, x in enumerate(param_names)} + names_of_cols_level0 = [ordered_param_names_no_prefix.get(col) for col in _df.columns.levels[0]] + assert len(names_of_cols_level0) == len(_df.columns.levels[0]) + _df.columns = _df.columns.set_levels(names_of_cols_level0, level=0) + return _df + + def get_counts_of_death_by_year(df): + df["year"] = df["date"].dt.year + result = df.groupby(by=["year", "label"])["person_id"].count() + return result + + def get_num_dalys_by_year(_df): + drop_cols = ["date", "sex", "age_range"] + result = _df.drop(columns=drop_cols).groupby("year", as_index=True).sum().stack() + result.index.set_names(["year", "label"], inplace=True) + return result + + def rename_index_levels(df): + rename_dict = {} + for level in df.index.names: + if level == 'date': + rename_dict['date'] = 'year' + elif level == 'label': + rename_dict['label'] = 'cause' + + if rename_dict: + df = df.rename_axis(index=rename_dict) + + if "year" in df.index.names: + years = df.index.get_level_values("year") + mask = (years >= 2010) & (years < 2020) + df = df[mask] + return df + + param_names = get_parameter_names_from_scenario_file() + + num_deaths_by_year_and_cause = rename_index_levels(extract_results( + results_folder, + module="tlo.methods.demography", + key="death", + custom_generate_series=get_counts_of_death_by_year, + do_scaling=True + )) + + num_dalys_by_year_and_cause = rename_index_levels(extract_results( + results_folder, + module="tlo.methods.healthburden", + key="dalys_stacked", + custom_generate_series=get_num_dalys_by_year, + do_scaling=True + )) + + num_individuals = extract_results(results_folder, + module="tlo.methods.demography", + key="population", + column="total", + index="date", + do_scaling=True + ) + num_individuals.index = num_individuals.index.year + num_individuals = rename_index_levels(num_individuals) + + cfr = num_deaths_by_year_and_cause.div(num_individuals, level="year") + + data = {'deaths' : num_deaths_by_year_and_cause, 'dalys' : num_dalys_by_year_and_cause, 'pop' : num_individuals, 'cfr' : cfr} + return data + + +# Extract and calculate summary statistics +def compute_summary_stats(df): + df_mean = df.mean(axis=1) + df[('0','mean')] = df_mean + df_lower = df.quantile((1-0.682)/2.0,axis=1) + df_upper = df.quantile((1-0.682)/2.0+0.682,axis=1) + df[('0','mean')] = df_mean + df[('0','lower')] = df_lower + df[('0','upper')] = df_upper + return df + +for key in outputs.keys(): + outputs[key]['data'] = apply(outputs[key]['results_folder']) + for data_type in outputs[key]['data'].keys(): + print("Summary statistics for ", data_type) + outputs[key]['data'][data_type] = compute_summary_stats(outputs[key]['data'][data_type]) + + +def compare_and_plot(outputs, first_scenario, second_scenario, target, factor=None, ylabel=None): + + if factor is None: + df_first = outputs[first_scenario]['data'][target] # formally df + df_second = outputs[second_scenario]['data'][target] # formally df_emulated + else: + df_first = outputs[first_scenario]['data'][target]*factor # formally df + df_second = outputs[second_scenario]['data'][target]*factor # formally df_emulated + + # Define label names + labels = [first_scenario, second_scenario] + + # Iterate over causes + causes = df_first.index.get_level_values('cause').unique() + for cause in causes: + # Extract relevant data for both scenarios + data = {} + for label_name, dataset in zip(labels, [df_first, df_second]): + df_cause = dataset.loc[dataset.index.get_level_values('cause') == cause, ('0', ['lower', 'mean', 'upper'])] + data[label_name] = { + 'lower': df_cause[('0', 'lower')], + 'mean': df_cause[('0', 'mean')], + 'upper': df_cause[('0', 'upper')] + } + print(data) + + years = data[labels[0]]['mean'].index.get_level_values('year') + + # % error + percent_error = 100 * (data[labels[1]]['mean'].values - data[labels[0]]['mean'].values) / data[labels[0]]['mean'].values + + # Create figure with two panels + fig, (ax_main, ax_error) = plt.subplots(2, 1, figsize=(10, 8), + gridspec_kw={'height_ratios': [3, 1]}, sharex=True) + + colors = ['blue', 'orange'] + markers = ['o', 'x'] + for i, label_name in enumerate(labels): + ax_main.plot(years, data[label_name]['mean'], label=label_name, color=colors[i], linestyle='-', marker=markers[i]) + ax_main.fill_between(years, data[label_name]['lower'], data[label_name]['upper'], color=colors[i], alpha=0.2) + + if ylabel is None: + ax_main.set_ylabel(f'{target}') + else: + ax_main.set_ylabel(f'{ylabel}') + ax_main.set_title(f'{target} due to {cause}') + ax_main.legend(loc='lower right') + ax_main.grid(True) + + # % error panel + ax_error.plot(years, percent_error, color='red', marker='o', linestyle='-', label='Mean') + ax_error.axhline(0, color='black', linestyle='--', linewidth=0.8) + ax_error.set_ylabel('% Error') + ax_error.set_xlabel('Year') + ax_error.legend(loc='upper right') + ax_error.grid(True) + + plt.tight_layout() + plt.savefig(f'plots/final_{target}_due_to_{cause}.png') + plt.close() + +compare_and_plot(outputs, 'standard_RTI', 'emulated_RTI', 'deaths', None, 'Deaths') +compare_and_plot(outputs, 'standard_RTI', 'emulated_RTI', 'dalys', None, 'DALYs') +compare_and_plot(outputs, 'standard_RTI', 'emulated_RTI', 'cfr', 1000, 'Crude mortality rate (/year/1000 individuals)') + +exit(-1) + + + + +TARGET_PERIOD = (Date(min_year, 1, 1), Date(max_year, 1, 1)) + +# Definitions of general helper functions +lambda stub: output_folder / f"{stub.replace('*', '_star_')}.png" # noqa: E731 + +def target_period() -> str: + """Returns the target period as a string of the form YYYY-YYYY""" + return "-".join(str(t.year) for t in TARGET_PERIOD) + + + +def get_num_deaths(_df): + """Return total number of Deaths (total within the TARGET_PERIOD) + """ + return pd.Series(data=len(_df.loc[pd.to_datetime(_df.date).between(*TARGET_PERIOD)])) + +def get_num_dalys(_df): + """Return total number of DALYs (Stacked) by label (total within the TARGET_PERIOD)""" + return pd.Series( + data=_df + .loc[_df.year.between(*[i.year for i in TARGET_PERIOD])] + .drop(columns=['date', 'sex', 'age_range', 'year']) + .sum().sum() + ) + +def get_num_dalys_by_cause(_df): + """Return number of DALYs by cause by label (total within the TARGET_PERIOD)""" + return pd.Series( + data=_df + .loc[_df.year.between(*[i.year for i in TARGET_PERIOD])] + .drop(columns=['date', 'sex', 'age_range', 'year']) + .sum() + ) + +def set_param_names_as_column_index_level_0(_df): + """Set the columns index (level 0) as the param_names.""" + ordered_param_names_no_prefix = {i: x for i, x in enumerate(param_names)} + names_of_cols_level0 = [ordered_param_names_no_prefix.get(col) for col in _df.columns.levels[0]] + assert len(names_of_cols_level0) == len(_df.columns.levels[0]) + _df.columns = _df.columns.set_levels(names_of_cols_level0, level=0) + return _df + +def find_difference_relative_to_comparison(_ser: pd.Series, + comparison: str, + scaled: bool = False, + drop_comparison: bool = True, + ): + """Find the difference in the values in a pd.Series with a multi-index, between the draws (level 0) + within the runs (level 1), relative to where draw = `comparison`. + The comparison is `X - COMPARISON`.""" + return _ser \ + .unstack(level=0) \ + .apply(lambda x: (x - x[comparison]) / (x[comparison] if scaled else 1.0), axis=1) \ + .drop(columns=([comparison] if drop_comparison else [])) \ + .stack() + + +def get_counts_of_hsi_by_treatment_id(_df): + """Get the counts of the short TREATMENT_IDs occurring""" + _counts_by_treatment_id = _df \ + .loc[pd.to_datetime(_df['date']).between(*TARGET_PERIOD), 'TREATMENT_ID'] \ + .apply(pd.Series) \ + .sum() \ + .astype(int) + return _counts_by_treatment_id.groupby(level=0).sum() + +year_target = 2023 +def get_counts_of_hsi_by_treatment_id_by_year(_df): + """Get the counts of the short TREATMENT_IDs occurring""" + _counts_by_treatment_id = _df \ + .loc[pd.to_datetime(_df['date']).dt.year ==year_target, 'TREATMENT_ID'] \ + .apply(pd.Series) \ + .sum() \ + .astype(int) + return _counts_by_treatment_id.groupby(level=0).sum() + +def get_counts_of_hsi_by_short_treatment_id(_df): + """Get the counts of the short TREATMENT_IDs occurring (shortened, up to first underscore)""" + _counts_by_treatment_id = get_counts_of_hsi_by_treatment_id(_df) + _short_treatment_id = _counts_by_treatment_id.index.map(lambda x: x.split('_')[0] + "*") + return _counts_by_treatment_id.groupby(by=_short_treatment_id).sum() + +def get_counts_of_hsi_by_short_treatment_id_by_year(_df): + """Get the counts of the short TREATMENT_IDs occurring (shortened, up to first underscore)""" + _counts_by_treatment_id = get_counts_of_hsi_by_treatment_id_by_year(_df) + _short_treatment_id = _counts_by_treatment_id.index.map(lambda x: x.split('_')[0] + "*") + return _counts_by_treatment_id.groupby(by=_short_treatment_id).sum() + + +# Obtain parameter names for this scenario file +param_names = get_parameter_names_from_scenario_file() +print(param_names) + + +def get_counts_of_death_by_year(df): + """Aggregate the model outputs into five-year periods for age and time""" + #_, agegrplookup = make_age_grp_lookup() + #_, calperiodlookup = make_calendar_period_lookup() + df["year"] = df["date"].dt.year + #df["age_grp"] = df["age"].map(agegrplookup).astype(make_age_grp_types()) + #df["period"] = df["year"].map(calperiodlookup).astype(make_calendar_period_type()) + return df.groupby(by=["year", "label"])["person_id"].count() + + +deaths_by_year_and_cause = extract_results( + results_folder, + module="tlo.methods.demography", + key="death", + custom_generate_series=get_counts_of_death_by_year, + do_scaling=True +).pipe(set_param_names_as_column_index_level_0) + + +print(deaths_by_year_and_cause) +print(list(deaths_by_year_and_cause.index)) +deaths_by_year_and_cause.to_csv('ConvertedOutputs/Deaths_by_cause_with_time.csv', index=True) +exit(-1) + +# ================================================================================================ +# TIME EVOLUTION OF TOTAL DALYs +# Plot DALYs averted compared to the ``No Policy'' policy + +year_target = 2023 # This global variable will be passed to custom function +def get_num_dalys_in_year(_df): + """Return total number of DALYs (Stacked) by label (total within the TARGET_PERIOD)""" + return pd.Series( + data=_df + .loc[_df.year == year_target] + .drop(columns=['date', 'sex', 'age_range', 'year']) + .sum().sum() + ) + +ALL = {} +# Plot time trend show year prior transition as well to emphasise that until that point DALYs incurred +# are consistent across different policies +this_min_year = 2010 +for year in range(this_min_year, max_year+1): + year_target = year + num_dalys_by_year = extract_results( + results_folder, + module='tlo.methods.healthburden', + key='dalys_stacked', + custom_generate_series=get_num_dalys_in_year, + do_scaling=True + ).pipe(set_param_names_as_column_index_level_0) + ALL[year_target] = num_dalys_by_year + print(num_dalys_by_year.columns) +# Concatenate the DataFrames into a single DataFrame +concatenated_df = pd.concat(ALL.values(), keys=ALL.keys()) +concatenated_df.index = concatenated_df.index.set_names(['date', 'index_original']) +concatenated_df = concatenated_df.reset_index(level='index_original',drop=True) +dalys_by_year = concatenated_df +print(dalys_by_year) +dalys_by_year.to_csv('ConvertedOutputs/Total_DALYs_with_time.csv', index=True) + +# ================================================================================================ +# Print population under each scenario +pop_model = extract_results(results_folder, + module="tlo.methods.demography", + key="population", + column="total", + index="date", + do_scaling=True + ).pipe(set_param_names_as_column_index_level_0) + +pop_model.index = pop_model.index.year +pop_model = pop_model[(pop_model.index >= this_min_year) & (pop_model.index <= max_year)] +print(pop_model) +assert dalys_by_year.index.equals(pop_model.index) +assert all(dalys_by_year.columns == pop_model.columns) +pop_model.to_csv('ConvertedOutputs/Population_with_time.csv', index=True) + +# ================================================================================================ +# DALYs BROKEN DOWN BY CAUSES AND YEAR +# DALYs by cause per year +# %% Quantify the health losses associated with all interventions combined. + +year_target = 2023 # This global variable will be passed to custom function +def get_num_dalys_by_year_and_cause(_df): + """Return total number of DALYs (Stacked) by label (total within the TARGET_PERIOD)""" + return pd.Series( + data=_df + .loc[_df.year == year_target] + .drop(columns=['date', 'sex', 'age_range', 'year']) + .sum() + ) + +ALL = {} +# Plot time trend show year prior transition as well to emphasise that until that point DALYs incurred +# are consistent across different policies +this_min_year = 2010 +for year in range(this_min_year, max_year+1): + year_target = year + num_dalys_by_year = extract_results( + results_folder, + module='tlo.methods.healthburden', + key='dalys_stacked', + custom_generate_series=get_num_dalys_by_year_and_cause, + do_scaling=True + ).pipe(set_param_names_as_column_index_level_0) + ALL[year_target] = num_dalys_by_year #summarize(num_dalys_by_year) + +# Concatenate the DataFrames into a single DataFrame +concatenated_df = pd.concat(ALL.values(), keys=ALL.keys()) + +concatenated_df.index = concatenated_df.index.set_names(['date', 'cause']) + +df_total = concatenated_df +df_total.to_csv('ConvertedOutputs/DALYS_by_cause_with_time.csv', index=True) + +ALL = {} +# Plot time trend show year prior transition as well to emphasise that until that point DALYs incurred +# are consistent across different policies +for year in range(min_year, max_year+1): + year_target = year + + hsi_delivered_by_year = extract_results( + results_folder, + module='tlo.methods.healthsystem.summary', + key='HSI_Event', + custom_generate_series=get_counts_of_hsi_by_short_treatment_id_by_year, + do_scaling=True + ).pipe(set_param_names_as_column_index_level_0) + ALL[year_target] = hsi_delivered_by_year + +# Concatenate the DataFrames into a single DataFrame +concatenated_df = pd.concat(ALL.values(), keys=ALL.keys()) +concatenated_df.index = concatenated_df.index.set_names(['date', 'cause']) +HSI_ran_by_year = concatenated_df + +del ALL + +ALL = {} +# Plot time trend show year prior transition as well to emphasise that until that point DALYs incurred +# are consistent across different policies +for year in range(min_year, max_year+1): + year_target = year + + hsi_not_delivered_by_year = extract_results( + results_folder, + module='tlo.methods.healthsystem.summary', + key='Never_ran_HSI_Event', + custom_generate_series=get_counts_of_hsi_by_short_treatment_id_by_year, + do_scaling=True + ).pipe(set_param_names_as_column_index_level_0) + ALL[year_target] = hsi_not_delivered_by_year + +# Concatenate the DataFrames into a single DataFrame +concatenated_df = pd.concat(ALL.values(), keys=ALL.keys()) +concatenated_df.index = concatenated_df.index.set_names(['date', 'cause']) +HSI_never_ran_by_year = concatenated_df + +HSI_never_ran_by_year = HSI_never_ran_by_year.fillna(0) #clean_df( +HSI_ran_by_year = HSI_ran_by_year.fillna(0) +HSI_total_by_year = HSI_ran_by_year.add(HSI_never_ran_by_year, fill_value=0) +HSI_ran_by_year.to_csv('ConvertedOutputs/HSIs_ran_by_area_with_time.csv', index=True) +HSI_never_ran_by_year.to_csv('ConvertedOutputs/HSIs_never_ran_by_area_with_time.csv', index=True) +print(HSI_ran_by_year) +print(HSI_never_ran_by_year) +print(HSI_total_by_year) +exit(-1) + +""" +if __name__ == "__main__": + rfp = Path('resources') + + parser = argparse.ArgumentParser( + description="Produce plots to show the impact each set of treatments", + formatter_class=argparse.ArgumentDefaultsHelpFormatter, + ) + parser.add_argument( + "--output-path", + help=( + "Directory to write outputs to. If not specified (set to None) outputs " + "will be written to value of --results-path argument." + ), + type=Path, + default=None, + required=False, + ) + parser.add_argument( + "--resources-path", + help="Directory containing resource files", + type=Path, + default=Path('resources'), + required=False, + ) + parser.add_argument( + "--results-path", + type=Path, + help=( + "Directory containing results from running " + "src/scripts/healthsystem/impact_of_const_capabilities_expansion/scenario_impact_of_capabilities_expansion_scaling.py " + ), + default=None, + required=False + ) + args = parser.parse_args() + assert args.results_path is not None + results_path = args.results_path + + output_path = results_path if args.output_path is None else args.output_path + + apply( + results_folder=results_path, + output_folder=output_path, + resourcefilepath=args.resources_path + ) +""" diff --git a/src/scripts/rti_emulator/scenario_test_rti_emulator.py b/src/scripts/rti_emulator/scenario_test_rti_emulator.py index e939ca0ac2..1fd15e8641 100644 --- a/src/scripts/rti_emulator/scenario_test_rti_emulator.py +++ b/src/scripts/rti_emulator/scenario_test_rti_emulator.py @@ -58,7 +58,7 @@ def __init__(self): self.pop_size = 50_000 self._scenarios = self._get_scenarios() self.number_of_draws = len(self._scenarios) - self.runs_per_draw = 10 + self.runs_per_draw = 50 def log_configuration(self): return { diff --git a/src/tlo/methods/demography.py b/src/tlo/methods/demography.py index cf81c18804..342851ef46 100644 --- a/src/tlo/methods/demography.py +++ b/src/tlo/methods/demography.py @@ -1,4 +1,4 @@ - +0 """ The core demography module and its associated events. * Sets initial population size @@ -731,7 +731,7 @@ def apply(self, population): 'age_years', 'sex', 'prob_of_dying_before_next_poll']].copy() # Artificially increase risk for men under 50 by 50 - # mort_risk.loc[(mort_risk["sex"] == "M") & (mort_risk["age_years"] < 50), "prob_of_dying_before_next_poll"] *= 250 + mort_risk.loc[(mort_risk["sex"] == "M") & (mort_risk["age_years"] < 50), "prob_of_dying_before_next_poll"] *= 250 # get the population alive = df.loc[df.is_alive & (df.age_years < MAX_AGE), ['sex', 'age_years']].copy() diff --git a/src/tlo/methods/rti.py b/src/tlo/methods/rti.py index 0407a2fec9..0b009e595c 100644 --- a/src/tlo/methods/rti.py +++ b/src/tlo/methods/rti.py @@ -31,9 +31,9 @@ from tlo.population import IndividualProperties include_conditionality = True -emulator_path = '/Users/mm2908/Desktop/EmuIBM/emulators/latest_CTGANSynthesizer_epochs500_dsF_batch_size500_num_k_folds10_Nsubsample10000_InAndOutC_test_k_folding_UniformEncoder_CTGANtest3_repeat_seed42_k_fold0.pkl' +emulator_path = '/Users/mm2908/Desktop/EmuIBM/Save_With_WellPerforming/emulators/latest_CTGANSynthesizer_epochs500_dsF_batch_size500_num_k_folds10_Nsubsample10000_InAndOutC_test_k_folding_UniformEncoder_CTGANtest3_repeat_seed42_k_fold0.pkl' -rti_scenario = "emulator" #standard # "emulator", "noRTIpoll" +rti_scenario = "standard" #standard # "emulator", "noRTIpoll" if rti_scenario == "emulator": use_emulator = True elif rti_scenario == "standard" or rti_scenario == "noRTIpoll": diff --git a/src/tlo/simulation.py b/src/tlo/simulation.py index 19398cfd9c..ed9b747dd3 100644 --- a/src/tlo/simulation.py +++ b/src/tlo/simulation.py @@ -372,11 +372,11 @@ def run_simulation_to(self, *, to_date: Date) -> None: progress_bar.stop() end_time = time.process_time()- start_time if rti_scenario == "standard": - filename = 'duration_time_standardRTI_200000ind.txt' + filename = 'duration_time_standardRTI_x250.txt' elif rti_scenario == "emulator": - filename = 'duration_time_emulatedRTI_200000ind.txt' + filename = 'duration_time_emulatedRTI_x250.txt' elif rti_scenario == "noRTIpoll": - filename = 'duration_time_noRTIPoll_200000ind.txt' + filename = 'duration_time_noRTIPoll_x250.txt' else: print("ERROR: I don't know pathname") exit(-1) From c341cc5624d7d4552fbcde1dc37fca63a699b375 Mon Sep 17 00:00:00 2001 From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com> Date: Mon, 8 Sep 2025 09:51:51 +0100 Subject: [PATCH 20/25] Add the dalys by sex and age plot --- .../final_analysis_extract_data.py | 336 +++++++++++++++++- 1 file changed, 328 insertions(+), 8 deletions(-) diff --git a/src/scripts/rti_emulator/final_analysis_extract_data.py b/src/scripts/rti_emulator/final_analysis_extract_data.py index a3f935edb7..ea719ee0dc 100644 --- a/src/scripts/rti_emulator/final_analysis_extract_data.py +++ b/src/scripts/rti_emulator/final_analysis_extract_data.py @@ -9,13 +9,22 @@ import pandas as pd from tlo import Date -from tlo.analysis.utils import extract_results +from tlo.analysis.utils import extract_results, make_calendar_period_type, make_calendar_period_lookup, make_age_grp_lookup, make_age_grp_types import matplotlib.pyplot as plt +# Archive of outputs +# Batch based on /Users/mm2908/Desktop/EmuIBM/Save_With_WellPerforming/emulators/latest_CTGANSynthesizer_epochs500_dsF_batch_size500_num_k_folds10_Nsubsample10000_InAndOutC_test_k_folding_UniformEncoder_CTGANtest3_repeat_seed42_k_fold0.pkl +# 'standard_RTI': 'outputs/test_rti_emulator-2025-08-12T205454Z' +# 'emulated_RTI': 'outputs/test_rti_emulator-2025-08-13T080302Z' +# 'standard_RTI_x250': 'outputs/test_rti_emulator-2025-09-06T055808Z' +# 'emulated_RTI_x250': 'outputs/test_rti_emulator-2025-09-05T085159Z' +# 'no_RTI': 'outputs/test_rti_emulator-2025-08-13T143342Z' outputs = { 'standard_RTI': {'results_folder' : Path('outputs/test_rti_emulator-2025-08-12T205454Z'), 'data': {}}, 'emulated_RTI': {'results_folder' : Path('outputs/test_rti_emulator-2025-08-13T080302Z'), 'data' : {}}, + 'standard_RTI_x250': {'results_folder' : Path('outputs/test_rti_emulator-2025-09-06T055808Z'), 'data': {}}, + 'emulated_RTI_x250': {'results_folder' : Path('outputs/test_rti_emulator-2025-09-05T085159Z'), 'data' : {}}, 'no_RTI': {'results_folder' : Path('outputs/test_rti_emulator-2025-08-13T143342Z'), 'data' : {}}, } @@ -50,6 +59,17 @@ def get_num_dalys_by_year(_df): result = _df.drop(columns=drop_cols).groupby("year", as_index=True).sum().stack() result.index.set_names(["year", "label"], inplace=True) return result + + def get_dalys_by_period_sex_agegrp_label(df): + """Sum the dalys by period, sex, age-group and label""" + _, calperiodlookup = make_calendar_period_lookup() + + df['age_grp'] = df['age_range'].astype(make_age_grp_types()) + df["period"] = df["year"].map(calperiodlookup).astype(make_calendar_period_type()) + df = df.drop(columns=['date', 'age_range', 'year']) + df = df.groupby(by=["period", "sex", "age_grp"]).sum().stack() + df.index = df.index.set_names('label', level=3) + return df def rename_index_levels(df): rename_dict = {} @@ -98,7 +118,20 @@ def rename_index_levels(df): cfr = num_deaths_by_year_and_cause.div(num_individuals, level="year") - data = {'deaths' : num_deaths_by_year_and_cause, 'dalys' : num_dalys_by_year_and_cause, 'pop' : num_individuals, 'cfr' : cfr} + dalys_by_sex_and_age_time = extract_results( + results_folder, + module="tlo.methods.healthburden", + key="dalys_stacked_by_age_and_time", # <-- for DALYS stacked by age and time + custom_generate_series=get_dalys_by_period_sex_agegrp_label, + do_scaling=True + ) + # divide by five to give the average number of deaths per year within the five year period: + dalys_by_sex_and_age_time = dalys_by_sex_and_age_time.div(5.0) + dalys_by_sex_and_age = (dalys_by_sex_and_age_time.loc['2010-2014'] + dalys_by_sex_and_age_time.loc['2015-2019'])/2 + + # Collect all data for this output + data = {'deaths' : num_deaths_by_year_and_cause, 'dalys' : num_dalys_by_year_and_cause, 'pop' : num_individuals, 'cfr' : cfr, 'dalys_by_sex_and_age' : dalys_by_sex_and_age} + return data @@ -113,11 +146,289 @@ def compute_summary_stats(df): df[('0','upper')] = df_upper return df -for key in outputs.keys(): - outputs[key]['data'] = apply(outputs[key]['results_folder']) - for data_type in outputs[key]['data'].keys(): - print("Summary statistics for ", data_type) - outputs[key]['data'][data_type] = compute_summary_stats(outputs[key]['data'][data_type]) + +def compare_and_make_plots_age_sex_distr(outputs, first_scenario, second_scenario, third_scenario, fourth_scenario, target, compare_to_other_x250=True): + # Load from outputs + df_map = { + "normal": outputs[first_scenario]['data'][target], + "emulated": outputs[second_scenario]['data'][target], + "normal_x250": outputs[third_scenario]['data'][target], + "emulated_x250": outputs[fourth_scenario]['data'][target], + } + labels = [first_scenario, second_scenario, third_scenario, fourth_scenario] + + # Focus cause + cause = 'Transport Injuries' + for k, df in df_map.items(): + df_map[k] = compute_summary_stats(df.loc[(slice(None), slice(None), cause)]) + + # Age group mapping + age_grps = df_map["normal"].index.get_level_values('age_grp').unique() + age_grp_mapping = {age: i for i, age in enumerate(age_grps)} + + # Init p-values + pvalues = pd.DataFrame(columns=['p-value ks', 'p-value t', 'p-value w'], + index=pd.MultiIndex.from_tuples([], names=['case', 'sex', 'age_grp'])) + """ + # ---- Helper: compute p-values + def run_tests(case, df1, df2): + for sex in ['F', 'M']: + for age in age_grps: + data1, data2 = df1.loc[(sex, age)].values, df2.loc[(sex, age)].values + p_ks = ks_2samp(data1, data2).pvalue + p_t = stats.ttest_ind(data1, data2).pvalue + p_w = stats.wilcoxon(data1, data2).pvalue + pvalues.loc[(case, sex, age), :] = [p_ks, p_t, p_w] + + run_tests("other_x1", df_map["normal"], df_map["emulated"]) + if compare_to_other_x250: + run_tests("other_x250", df_map["normal_x250"], df_map["emulated_x250"]) + """ + + # ---- Plot + fig, axes = plt.subplots(1, 2, figsize=(14, 7), sharey=True) + for ax in axes: ax.set_ylim(-10, 120000) + + for i, sex in enumerate(['F', 'M']): + x_pos = [age_grp_mapping[a] for a in age_grps] + + def plot_band_and_mean(df, color, marker, label, alpha_fill, alpha_line, ls): + mean = df.loc[(sex, slice(None)), ('0', 'mean')].values + low = df.loc[(sex, slice(None)), ('0', 'lower')].values + up = df.loc[(sex, slice(None)), ('0', 'upper')].values + axes[i].fill_between(x_pos, low, up, color=color, alpha=alpha_fill) + axes[i].plot(x_pos, mean, marker=marker, color=color, alpha=alpha_line, linestyle=ls, label=label) + return mean + + # Plot main (normal vs emulated) + ls, fill, line = ('--', 0.1, 0.4) if compare_to_other_x250 else ('-', 0.3, 1.0) + m_norm = plot_band_and_mean(df_map["normal"], "blue", "o", labels[0] if i==0 else None, fill, line, ls) + m_emul = plot_band_and_mean(df_map["emulated"], "orange", "s", labels[1] if i==0 else None, fill, line, ls) + + # Optional comparison to x250 + if compare_to_other_x250: + m_norm_x250 = plot_band_and_mean(df_map["normal_x250"], "blue", "o", labels[2] if i==0 else None, 0.3, 1.0, '-') + m_emul_x250 = plot_band_and_mean(df_map["emulated_x250"], "orange", "s", labels[3] if i==0 else None, 0.3, 1.0, '-') + + """ + # Annotate KS values + y_base = 42000 if sex=="M" else 30000 + for case, offset in [("other_x1", 7000), ("other_x250", 0)] if compare_to_other_x250 else [("other_x1", 7000)]: + for age in age_grps: + axes[i].text(age_grp_mapping[age], y_base+offset, + f"{pvalues.loc[(case, sex, age), 'p-value ks']:.2f}", + fontsize=8, ha="center", rotation=90) + axes[i].text(age_grp_mapping['0-4'], y_base+offset+3000, + "KS test p-value" + (" (incr. other mortality)" if case=="other_x250" else ""), + fontsize=8, ha="left") + """ + # Formatting + axes[i].set_title(sex) + axes[i].set_xlabel("Age Group") + axes[i].set_ylabel("Averaged Yearly DALYs") + axes[i].set_xticks(x_pos) + axes[i].set_xticklabels(age_grps, rotation=45) + axes[i].legend() + + plt.tight_layout() + fname = "plots/final_DALYs_Breakdown_by_age_grp_DC.png" if compare_to_other_x250 else "plots/final_DALYs_Breakdown_by_age_grp.png" + plt.savefig(fname) + plt.close() + +def compare_old(outputs, first_scenario, second_scenario, third_scenario, fourth_scenario, target): + + # Load output files, which include standard other mortality and other mortality rates increased by x250 + df_af_normal = pd.read_csv('DALYs_by_sex_age_' + first_data_set + '.csv', header=[0, 1], index_col=[0,1,2]) + df_af_emulated = pd.read_csv('DALYs_by_sex_age_' + second_data_set + '.csv', header=[0, 1], index_col=[0,1,2]) + df_af_normal_other_x250 = pd.read_csv('DALYs_by_sex_age_' + first_data_set + '_x250_other.csv', header=[0, 1], index_col=[0,1,2]) + df_af_emulated_other_x250 = pd.read_csv('DALYs_by_sex_age_'+ second_data_set + '_x250_other.csv', header=[0, 1], index_col=[0,1,2]) + + # This is the cause that will be considered in the plots. Could also make loop over this + cause = 'Transport Injuries' + df_normal = df_af_normal.loc[(slice(None), slice(None), cause)] + df_normal_other_x250 = df_af_normal_other_x250.loc[(slice(None), slice(None), cause)] + df_emulated = df_af_emulated.loc[(slice(None), slice(None), cause)] + df_emulated_other_x250 = df_af_emulated_other_x250.loc[(slice(None), slice(None), cause)] + + # Map 'age_grp' to numeric values for easier plotting + age_grp_mapping = {age: i for i, age in enumerate(df_normal.index.get_level_values('age_grp').unique())} + + df_normal = compute_summary_stats(df_normal) + df_emulated = compute_summary_stats(df_emulated) + df_normal_other_x250 = compute_summary_stats(df_normal_other_x250) + df_emulated_other_x250 = compute_summary_stats(df_emulated_other_x250) + + # Store p-values for all x-values + index = pd.MultiIndex.from_tuples([], names=['case', 'sex', 'age_grp']) + columns = ['p-value ks', 'p-value t', 'p-value w'] + pvalues = pd.DataFrame(index=index, columns=columns) + + for sex in ['F','M']: + for age_group in df_normal_other_x250.index.get_level_values('age_grp').unique(): + + # Compute statistical tests for x1 other mortality + data_normal = df_normal_other_x250.loc[(sex,age_group)].values + data_emulat = df_emulated_other_x250.loc[(sex,age_group)].values + + statistic, p_value_ks = ks_2samp(data_normal, data_emulat) + t_stat, p_value_t = stats.ttest_ind(data_normal, data_emulat) + res_w = stats.wilcoxon(data_normal,data_emulat) + p_value_w = res_w.pvalue + pvalues.loc[('other_x250',sex,age_group),:] = [p_value_ks, p_value_t, p_value_w] + + # Compute statistical tests for x250 other mortality + data_normal = df_normal.loc[(sex,age_group)].values + data_emulat = df_emulated.loc[(sex,age_group)].values + + statistic, p_value_ks = ks_2samp(data_normal, data_emulat) + t_stat, p_value_t = stats.ttest_ind(data_normal, data_emulat) + res_w = stats.wilcoxon(data_normal,data_emulat) + p_value_w = res_w.pvalue + pvalues.loc[('other_x1',sex,age_group),:] = [p_value_ks, p_value_t, p_value_w] + + + # Create a figure for the subplots (one for each sex) + fig, axes = plt.subplots(1, 2, figsize=(14, 7), sharey=True) + for ax in axes: + ax.set_ylim(-1000, 55000) + + # Plot for each sex + for sex in ['F','M']: + if sex == 'F': + i = 0 + else: + i = 1 + + # Extract the age groups from the dataframe + age_groups = df_normal.index.get_level_values('age_grp').unique() + + # Create a numeric x_pos based on age groups for easier plotting + x_pos = [age_grp_mapping[age] for age in age_groups] + + # Prepare the values for plotting + normal_means = df_normal.loc[(sex, slice(None)),('0','mean')].values + emulated_means = df_emulated.loc[(sex, slice(None)),('0','mean')].values + normal_lower = df_normal.loc[(sex, slice(None)),('0','lower')].values + normal_upper = df_normal.loc[(sex, slice(None)),('0','upper')].values + emulated_lower = df_emulated.loc[(sex, slice(None)),('0','lower')].values + emulated_upper = df_emulated.loc[(sex, slice(None)),('0','upper')].values + + if compare_to_other_x250: + normal_means_other_x250 = df_normal_other_x250.loc[(sex, slice(None)),('0','mean')].values + emulated_means_other_x250 = df_emulated_other_x250.loc[(sex, slice(None)),('0','mean')].values + normal_lower_other_x250 = df_normal_other_x250.loc[(sex, slice(None)),('0','lower')].values + normal_upper_other_x250 = df_normal_other_x250.loc[(sex, slice(None)),('0','upper')].values + emulated_lower_other_x250 = df_emulated_other_x250.loc[(sex, slice(None)),('0','lower')].values + emulated_upper_other_x250 = df_emulated_other_x250.loc[(sex, slice(None)),('0','upper')].values + fill_main_comparison = 0.1 + line_main_compariosn = 0.4 + main_linestyle = '--' + print(main_linestyle) + else: + fill_main_comparison = 0.3 + line_main_compariosn = 1.0 + main_linestyle = '-' + + # Calculate % error: ((emulated - original) / original) * 100 + percent_error = 100 * (emulated_means - normal_means) / normal_means + if compare_to_other_x250: + percent_error_other_x250 = 100 * (emulated_means_other_x250 - normal_means_other_x250) / normal_means_other_x250 + + # Plot the shaded area for normal data (F or M) + axes[i].fill_between(x_pos, normal_lower, normal_upper, + color='blue', alpha=fill_main_comparison) + if compare_to_other_x250: + axes[i].fill_between(x_pos, normal_lower_other_x250, normal_upper_other_x250, + color='blue', alpha=0.3) + + # Plot the shaded area for emulated data (F or M) + axes[i].fill_between(x_pos, emulated_lower, emulated_upper, + color='orange', alpha=fill_main_comparison) + if compare_to_other_x250: + axes[i].fill_between(x_pos, emulated_lower_other_x250, emulated_upper_other_x250, + color='orange', alpha=0.3) + + # Plot the mean values for normal and emulated data + axes[i].plot(x_pos, normal_means, 'o-', color='blue', alpha=line_main_compariosn, linestyle=main_linestyle, label=labels[first_data_set] if i == 0 else "") # Normal mean + axes[i].plot(x_pos, emulated_means, 's-', color='orange', alpha=line_main_compariosn, linestyle=main_linestyle, label=labels[second_data_set] if i == 0 else "") # Emulated mean + if compare_to_other_x250: + axes[i].plot(x_pos, normal_means_other_x250, 'o-', color='blue', label=f'Original (incr. other mortality)' if i == 0 else "") # Normal mean + axes[i].plot(x_pos, emulated_means_other_x250, 's-', color='orange', label=f'Emulated (incr. other mortality)' if i == 0 else "") # Emulated mean + + # Include KS test p-values in the plot + if sex == 'M': + ks_yloc_other_x250 = 42000 + else: + ks_yloc_other_x250 = 30000 + ks_yloc_other_x1 = ks_yloc_other_x250 + 7000 + + for age_grp in df_normal.index.get_level_values('age_grp').unique(): + ks_value = pvalues.loc[('other_x1',sex, age_grp), 'p-value ks'] + axes[i].text( + x=age_grp_mapping[age_grp], + y=ks_yloc_other_x1, + s=f"{ks_value:.2f}", + fontsize=8, + ha='center', + rotation=90, + #bbox=dict(boxstyle="round,pad=0.1", facecolor="white", alpha=0.7) + ) + + #ks_value = pvalues.loc[(date, cause), 'p-value ks'] + leg_ypos = ks_yloc_other_x1+3000 + axes[i].text( + x=age_grp_mapping['0-4'], + y=leg_ypos, + s=f"KS test p-value", + fontsize=8, + ha='left', + rotation=0, + #bbox=dict(boxstyle="round,pad=0.2", facecolor="white", alpha=0.7) + ) + if compare_to_other_x250: + for age_grp in df_normal.index.get_level_values('age_grp').unique(): + ks_value = pvalues.loc[('other_x250',sex, age_grp), 'p-value ks'] + axes[i].text( + x=age_grp_mapping[age_grp], + y=ks_yloc_other_x250, + s=f"{ks_value:.2f}", + fontsize=8, + ha='center', + rotation=90, + #bbox=dict(boxstyle="round,pad=0.2", facecolor="white", alpha=0.7) + ) + + leg_ypos = ks_yloc_other_x250+3000 + axes[i].text( + x=age_grp_mapping['0-4'], + y=leg_ypos, + s=f"KS test p-value (incr. other mortality)", + fontsize=8, + ha='left', + rotation=0, + #bbox=dict(boxstyle="round,pad=0.2", facecolor="white", alpha=0.7) + ) + + # Set titles and labels + axes[i].set_title(f'{sex}') + axes[i].set_xlabel('Age Group') + axes[i].set_ylabel('Averaged Yearly DALYs') + + # Set x-ticks and labels + axes[i].set_xticks(x_pos) + axes[i].set_xticklabels(age_groups, rotation=45) + + # Add a legend + axes[i].legend() + + # Adjust layout for better spacing + plt.tight_layout() + + # Show the plot + if compare_to_other_x250: + plt.savefig('plots/final_DALYs_Breakdown_by_age_grp_DC.png') + else: + plt.savefig('plots/final_DALYs_Breakdown_by_age_grp.png') def compare_and_plot(outputs, first_scenario, second_scenario, target, factor=None, ylabel=None): @@ -144,7 +455,6 @@ def compare_and_plot(outputs, first_scenario, second_scenario, target, factor=No 'mean': df_cause[('0', 'mean')], 'upper': df_cause[('0', 'upper')] } - print(data) years = data[labels[0]]['mean'].index.get_level_values('year') @@ -181,9 +491,19 @@ def compare_and_plot(outputs, first_scenario, second_scenario, target, factor=No plt.savefig(f'plots/final_{target}_due_to_{cause}.png') plt.close() + +# First collect all data +for key in outputs.keys(): + outputs[key]['data'] = apply(outputs[key]['results_folder']) + for data_type in outputs[key]['data'].keys(): + outputs[key]['data'][data_type] = compute_summary_stats(outputs[key]['data'][data_type]) + + compare_and_plot(outputs, 'standard_RTI', 'emulated_RTI', 'deaths', None, 'Deaths') compare_and_plot(outputs, 'standard_RTI', 'emulated_RTI', 'dalys', None, 'DALYs') compare_and_plot(outputs, 'standard_RTI', 'emulated_RTI', 'cfr', 1000, 'Crude mortality rate (/year/1000 individuals)') +compare_and_make_plots_age_sex_distr(outputs, 'standard_RTI', 'emulated_RTI', 'standard_RTI_x250', 'emulated_RTI_x250', 'dalys_by_sex_and_age', compare_to_other_x250=True) + exit(-1) From c59a159386dd669ecc54492ceed6a66fc087c5db Mon Sep 17 00:00:00 2001 From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com> Date: Mon, 22 Sep 2025 09:17:50 +0100 Subject: [PATCH 21/25] Rescale y axes --- src/scripts/rti_emulator/final_analysis_extract_data.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/scripts/rti_emulator/final_analysis_extract_data.py b/src/scripts/rti_emulator/final_analysis_extract_data.py index ea719ee0dc..ca702699a4 100644 --- a/src/scripts/rti_emulator/final_analysis_extract_data.py +++ b/src/scripts/rti_emulator/final_analysis_extract_data.py @@ -20,6 +20,8 @@ # 'emulated_RTI_x250': 'outputs/test_rti_emulator-2025-09-05T085159Z' # 'no_RTI': 'outputs/test_rti_emulator-2025-08-13T143342Z' + +# Latest CTGAN runs + number of runs per scenario increased from 10 -> 50 outputs = { 'standard_RTI': {'results_folder' : Path('outputs/test_rti_emulator-2025-08-12T205454Z'), 'data': {}}, 'emulated_RTI': {'results_folder' : Path('outputs/test_rti_emulator-2025-08-13T080302Z'), 'data' : {}}, @@ -478,8 +480,11 @@ def compare_and_plot(outputs, first_scenario, second_scenario, target, factor=No ax_main.set_title(f'{target} due to {cause}') ax_main.legend(loc='lower right') ax_main.grid(True) + ax_main.set_ylim(0, 1.2e6) + # % error panel + print("For cause ", cause, " and target ", target, " MEAN PERC ERROR +++++++", percent_error.mean()) ax_error.plot(years, percent_error, color='red', marker='o', linestyle='-', label='Mean') ax_error.axhline(0, color='black', linestyle='--', linewidth=0.8) ax_error.set_ylabel('% Error') From 31218bb24048c39496887cc540e3d7dd6c9a2464 Mon Sep 17 00:00:00 2001 From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com> Date: Thu, 20 Nov 2025 09:29:08 +0000 Subject: [PATCH 22/25] Remove mention of RTI from simulation.py --- src/tlo/simulation.py | 17 ----------------- 1 file changed, 17 deletions(-) diff --git a/src/tlo/simulation.py b/src/tlo/simulation.py index ed9b747dd3..76aa30428d 100644 --- a/src/tlo/simulation.py +++ b/src/tlo/simulation.py @@ -9,7 +9,6 @@ from collections import OrderedDict from pathlib import Path from typing import TYPE_CHECKING, Optional -from tlo.methods.rti import rti_scenario import numpy as np @@ -350,7 +349,6 @@ def run_simulation_to(self, *, to_date: Date) -> None: :param to_date: Date to simulate up to but not including - must be before or equal to simulation end date specified in call to :py:meth:`initialise`. """ - start_time = time.process_time() if not self._initialised: msg = "Simulation must be initialised before calling run_simulation_to" @@ -370,21 +368,6 @@ def run_simulation_to(self, *, to_date: Date) -> None: self.date = to_date if self.show_progress_bar: progress_bar.stop() - end_time = time.process_time()- start_time - if rti_scenario == "standard": - filename = 'duration_time_standardRTI_x250.txt' - elif rti_scenario == "emulator": - filename = 'duration_time_emulatedRTI_x250.txt' - elif rti_scenario == "noRTIpoll": - filename = 'duration_time_noRTIPoll_x250.txt' - else: - print("ERROR: I don't know pathname") - exit(-1) - - with open(filename, 'a') as file: - # Append text to the file - file.write(f"Time taken, {end_time}\n") - print("Time taken", end_time) def simulate(self, *, end_date: Date) -> None: """Simulate until the given end date From 2fa4970b66097116ffe0bc37c00f51e2d271be78 Mon Sep 17 00:00:00 2001 From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com> Date: Thu, 20 Nov 2025 10:01:28 +0000 Subject: [PATCH 23/25] Make use of emulator an RTI parameter --- .../ResourceFile_RTI/parameter_values.csv | 4 +- src/tlo/methods/rti.py | 111 +++++++----------- 2 files changed, 45 insertions(+), 70 deletions(-) diff --git a/resources/ResourceFile_RTI/parameter_values.csv b/resources/ResourceFile_RTI/parameter_values.csv index 1167d1e7b5..8680720a48 100644 --- a/resources/ResourceFile_RTI/parameter_values.csv +++ b/resources/ResourceFile_RTI/parameter_values.csv @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:9ca079273006733c1c5f232de53d3ecc88bc8d1f6a9b6051dea7da29cb9df3cd -size 5049 +oid sha256:816b79270915a07ed53a8ca03f6fa3d31e314bc886884e198152dff551495eaa +size 5227 diff --git a/src/tlo/methods/rti.py b/src/tlo/methods/rti.py index 8d42dc39f0..f477c53bbf 100644 --- a/src/tlo/methods/rti.py +++ b/src/tlo/methods/rti.py @@ -31,22 +31,8 @@ from tlo.methods.hsi_generic_first_appts import HSIEventScheduler from tlo.population import IndividualProperties -include_conditionality = True emulator_path = '/Users/mm2908/Desktop/EmuIBM/Save_With_WellPerforming/emulators/latest_CTGANSynthesizer_epochs500_dsF_batch_size500_num_k_folds10_Nsubsample10000_InAndOutC_test_k_folding_UniformEncoder_CTGANtest3_repeat_seed42_k_fold0.pkl' -rti_scenario = "standard" #standard # "emulator", "noRTIpoll" -if rti_scenario == "emulator": - use_emulator = True -elif rti_scenario == "standard" or rti_scenario == "noRTIpoll": - use_emulator = False -else: - print("ERROR, I don't have scenario") - exit(-1) - -#emulator_path = '/Users/mm2908/Desktop/EmuIBM/emulators/latest_synthesizer.pkl' -#emulator_path = '/Users/mm2908/Desktop/CTGAN/emulators/RTI_emulator_VAE.pkl' -#emulator_path = '/Users/mm2908/Desktop/CTGAN/emulators/new_synthesizer.pkl' - # --------------------------------------------------------------------------------------------------------- # MODULE DEFINITIONS # --------------------------------------------------------------------------------------------------------- @@ -86,26 +72,15 @@ def __init__(self, name=None): # Initialize the counter with all items set to 0 HS_Use_by_RTI = Counter({col: 0 for col in HS_Use_Type}) - - #RTI_emulator = TVAESynthesizer.load( - # filepath=emulator_path - #) - - #RTI_emulator = GaussianCopulaSynthesizer.load( - # filepath=emulator_path - #) - - RTI_emulator = CTGANSynthesizer.load(filepath=emulator_path) - - if include_conditionality: - Rti_Services = ['Rti_AcutePainManagement','Rti_BurnManagement','Rti_FractureCast','Rti_Imaging','Rti_MajorSurgeries','Rti_MedicalIntervention','Rti_MinorSurgeries','Rti_OpenFractureTreatment','Rti_ShockTreatment','Rti_Suture','Rti_TetanusVaccine'] + + Rti_Services = ['Rti_AcutePainManagement','Rti_BurnManagement','Rti_FractureCast','Rti_Imaging','Rti_MajorSurgeries','Rti_MedicalIntervention','Rti_MinorSurgeries','Rti_OpenFractureTreatment','Rti_ShockTreatment','Rti_Suture','Rti_TetanusVaccine'] - HS_conditions = {} + HS_conditions = {} + RTI_emulator = None # ================================================================================ - INJURY_INDICES = range(1, 9) INJURY_COLUMNS = [f'rt_injury_{i}' for i in INJURY_INDICES] @@ -1074,12 +1049,15 @@ def __init__(self, name=None): 'maximum_number_of_times_HSI_events_should_run': Parameter( Types.INT, "limit on the number of times an HSI event can run" + ), + 'use_RTI_emulator': Parameter( + Types.BOOL, + "Replace module with RTI emulator, valid if running in mode 1 with actual consumable availability" ) - } # Define the module's parameters - if use_emulator: + if self.parameters['use_RTI_emulator']: PROPERTIES = { 'rt_disability': Property(Types.REAL, 'disability weight for current month'), 'rt_disability_permanent': Property(Types.REAL, 'disability weight incurred permanently'), @@ -1502,6 +1480,9 @@ def read_parameters(self, resourcefilepath: Optional[Path] = None): non_zero_total_daly_change = sum_check_daly_change_df.where(sum_check_daly_change_df > 0).dropna().index # ensure that these injuries are the permanent injuries assert non_zero_total_daly_change.to_list() == permanent_injuries + + if self.parameters['use_RTI_emulator']: + self.RTI_emulator = CTGANSynthesizer.load(filepath= resourcefilepath / 'ResourceFile_RTI/RTI_emulator.pkl') def rti_injury_diagnosis(self, person_id, the_appt_footprint): """ @@ -1540,7 +1521,7 @@ def initialise_population(self, population): healthy.""" df = population.props - if use_emulator: + if self.parameters['use_RTI_emulator']: df.loc[df.is_alive, 'rt_disability'] = 0 # default: no DALY df.loc[df.is_alive, 'rt_disability_permanent'] = 0 # default: no DALY df.loc[df.is_alive, 'rt_road_traffic_inc'] = False @@ -1595,29 +1576,27 @@ def initialise_simulation(self, sim): haven't then it asks whether they should die away from their injuries """ # Begin modelling road traffic injuries - if rti_scenario == "standard" or rti_scenario == "emulator": - sim.schedule_event(RTIPollingEvent(self), sim.date + DateOffset(months=0)) - - if use_emulator is False: - # Begin checking whether the persons injuries are healed - sim.schedule_event(RTI_Recovery_Event(self), sim.date + DateOffset(months=0)) - # Begin checking whether those with untreated injuries die - sim.schedule_event(RTI_Check_Death_No_Med(self), sim.date + DateOffset(months=0)) - + sim.schedule_event(RTIPollingEvent(self), sim.date + DateOffset(months=0)) + + if self.parameters['use_RTI_emulator'] is False: + # Begin checking whether the persons injuries are healed + sim.schedule_event(RTI_Recovery_Event(self), sim.date + DateOffset(months=0)) + # Begin checking whether those with untreated injuries die + sim.schedule_event(RTI_Check_Death_No_Med(self), sim.date + DateOffset(months=0)) # Begin logging the RTI events - # sim.schedule_event(RTI_Logging_Event(self), sim.date + DateOffset(months=1)) - - if use_emulator and include_conditionality: - # If all services are included, set everything to True - if sim.modules['HealthSystem'].service_availability == ['*']: - for i in sim.modules['RTI'].Rti_Services: + sim.schedule_event(RTI_Logging_Event(self), sim.date + DateOffset(months=1)) + + else: + # If all services are included, set everything to True + if sim.modules['HealthSystem'].service_availability == ['*']: + for i in sim.modules['RTI'].Rti_Services: + sim.modules['RTI'].HS_conditions[i] = True + else: + for i in sim.modules['RTI'].Rti_Services: + if (i + '_*') in sim.modules['HealthSystem'].service_availability: sim.modules['RTI'].HS_conditions[i] = True - else: - for i in sim.modules['RTI'].Rti_Services: - if (i + '_*') in sim.modules['HealthSystem'].service_availability: - sim.modules['RTI'].HS_conditions[i] = True - else: - sim.modules['RTI'].HS_conditions[i] = False + else: + sim.modules['RTI'].HS_conditions[i] = False # Look-up consumable item codes self.look_up_consumable_item_codes() @@ -2359,7 +2338,7 @@ def on_birth(self, mother_id, child_id): """ df = self.sim.population.props - if use_emulator: + if self.parameters['use_RTI_emulator']: df.at[child_id, 'rt_disability'] = 0 # default: no DALY df.at[child_id, 'rt_disability_permanent'] = 0 # default: no DALY df.at[child_id, 'rt_road_traffic_inc'] = False @@ -2926,7 +2905,7 @@ def apply(self, population): df = population.props now = self.sim.date - if use_emulator: + if self.parameters['use_RTI_emulator']: rt_current_non_ind = df.index[df.is_alive & ~df.rt_road_traffic_inc] else: # Reset injury properties after death, get an index of people who have died due to RTI, all causes @@ -2997,24 +2976,20 @@ def apply(self, population): df.loc[selected_for_rti, 'rt_road_traffic_inc'] = True # Set the date that people were injured to now df.loc[selected_for_rti, 'rt_date_inj'] = now - - - if use_emulator: + if self.parameters['use_RTI_emulator']: # This is where we want to replace normal course of events for RTI with emulator. # First, sample outcomes for individuals which were selected_for_rti. # For now, don't consider properties of individual when sampling outcome. All we care about is the number of samples. - if include_conditionality: - condition_for_Rti = Condition( - num_rows=len(selected_for_rti), - column_values=self.sim.modules['RTI'].HS_conditions - ) - NN_model = self.sim.modules['RTI'].RTI_emulator.sample_from_conditions( - conditions=[condition_for_Rti], - ) - else: - NN_model = self.sim.modules['RTI'].RTI_emulator.sample(len(selected_for_rti)) + condition_for_Rti = Condition( + num_rows=len(selected_for_rti), + column_values=self.sim.modules['RTI'].HS_conditions + ) + NN_model = self.sim.modules['RTI'].RTI_emulator.sample_from_conditions( + conditions=[condition_for_Rti], + ) + # HS USAGE # Get the total number of different types of appts that will be accessed as a result of this polling event and add to rolling count. for column in self.sim.modules['RTI'].HS_Use_Type: From a09fcbffa7dd96f14a79b87b68e67ec4e513e599 Mon Sep 17 00:00:00 2001 From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com> Date: Thu, 20 Nov 2025 10:02:01 +0000 Subject: [PATCH 24/25] Add and track RTI_emulator --- resources/ResourceFile_RTI/RTI_emulator.pkl | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 resources/ResourceFile_RTI/RTI_emulator.pkl diff --git a/resources/ResourceFile_RTI/RTI_emulator.pkl b/resources/ResourceFile_RTI/RTI_emulator.pkl new file mode 100644 index 0000000000..5312c280f5 --- /dev/null +++ b/resources/ResourceFile_RTI/RTI_emulator.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5bc5d14a555d635888a7fb760158493c79969281ed574aa871d1296e1bdb4ec9 +size 35333224 From e1cc300b090bc3a6e733e1819be968c0de3eebd0 Mon Sep 17 00:00:00 2001 From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com> Date: Thu, 20 Nov 2025 11:07:18 +0000 Subject: [PATCH 25/25] Final changes to healthsystem and rti --- .../ResourceFile_RTI/parameter_values.csv | 4 +- src/tlo/methods/healthsystem.py | 18 ++- src/tlo/methods/rti.py | 152 +++++++++--------- 3 files changed, 91 insertions(+), 83 deletions(-) diff --git a/resources/ResourceFile_RTI/parameter_values.csv b/resources/ResourceFile_RTI/parameter_values.csv index 8680720a48..98b99652b7 100644 --- a/resources/ResourceFile_RTI/parameter_values.csv +++ b/resources/ResourceFile_RTI/parameter_values.csv @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:816b79270915a07ed53a8ca03f6fa3d31e314bc886884e198152dff551495eaa -size 5227 +oid sha256:80e33bccc9ecd960e8c9a260ae0582214fb70ef32d6295dccaa0ca717869e489 +size 5229 diff --git a/src/tlo/methods/healthsystem.py b/src/tlo/methods/healthsystem.py index 8b3d4d6d01..172556016d 100644 --- a/src/tlo/methods/healthsystem.py +++ b/src/tlo/methods/healthsystem.py @@ -2019,12 +2019,14 @@ def on_end_of_year(self) -> None: # Add emulated appts to real ones in HS summary counters before logging the latter. # Notes: - # The emulated count is defined in the disease module itself, rather than in the HS, but + # 1) The emulated count is defined in the disease module itself, rather than in the HS, but # is reset at the HS level after the emulated appts have been added to the total count. # I *think* this is correct, but we may wish to discuss further. + # 2) Currently this is RTI-specific: in the future, this would have to be generalised to + # any emulated module # To total appt count - if 'RTI' in self.sim.modules: + if 'RTI' in self.sim.modules and self.sim.modules['RTI'].parameters['use_RTI_emulator']: for key, value in self.sim.modules['RTI'].HS_Use_by_RTI.items(): # Extract the category part (ignoring the level) _, category = key.split('_', 1) @@ -2042,12 +2044,12 @@ def on_end_of_year(self) -> None: # Reset emulator counter. self.sim.modules['RTI'].HS_Use_by_RTI = Counter({col: 0 for col in self.sim.modules['RTI'].HS_Use_Type}) - self._summary_counter.write_to_log_and_reset_counters() - self.consumables.on_end_of_year() - self.bed_days.on_end_of_year() - if self._hsi_event_count_log_period == "year": - self._write_hsi_event_counts_to_log_and_reset() - self._write_never_ran_hsi_event_counts_to_log_and_reset() + self._summary_counter.write_to_log_and_reset_counters() + self.consumables.on_end_of_year() + self.bed_days.on_end_of_year() + if self._hsi_event_count_log_period == "year": + self._write_hsi_event_counts_to_log_and_reset() + self._write_never_ran_hsi_event_counts_to_log_and_reset() # Record equipment usage for the year, for each facility diff --git a/src/tlo/methods/rti.py b/src/tlo/methods/rti.py index f477c53bbf..745b38d2b7 100644 --- a/src/tlo/methods/rti.py +++ b/src/tlo/methods/rti.py @@ -31,8 +31,6 @@ from tlo.methods.hsi_generic_first_appts import HSIEventScheduler from tlo.population import IndividualProperties -emulator_path = '/Users/mm2908/Desktop/EmuIBM/Save_With_WellPerforming/emulators/latest_CTGANSynthesizer_epochs500_dsF_batch_size500_num_k_folds10_Nsubsample10000_InAndOutC_test_k_folding_UniformEncoder_CTGANtest3_repeat_seed42_k_fold0.pkl' - # --------------------------------------------------------------------------------------------------------- # MODULE DEFINITIONS # --------------------------------------------------------------------------------------------------------- @@ -1056,15 +1054,17 @@ def __init__(self, name=None): ) } - # Define the module's parameters - if self.parameters['use_RTI_emulator']: - PROPERTIES = { - 'rt_disability': Property(Types.REAL, 'disability weight for current month'), - 'rt_disability_permanent': Property(Types.REAL, 'disability weight incurred permanently'), - 'rt_date_inj': Property(Types.DATE, 'date of latest injury'), - 'rt_road_traffic_inc': Property(Types.BOOL, 'involved in a road traffic injury'), - } - else: + # Define the module's properties + # ISSUE: When using an emulator, want to limit properties to these four. However parameters have not yet been initialised at this point, so we don't know whether user intends to use emulator or not. + if True: + # This should be: if parameters['use_RTI_emulator']: + # PROPERTIES = { + # 'rt_disability': Property(Types.REAL, 'disability weight for current month'), + # 'rt_disability_permanent': Property(Types.REAL, 'disability weight incurred permanently'), + # 'rt_date_inj': Property(Types.DATE, 'date of latest injury'), + # 'rt_road_traffic_inc': Property(Types.BOOL, 'involved in a road traffic injury'), + # } + #else: PROPERTIES = { 'rt_disability': Property(Types.REAL, 'disability weight for current month'), 'rt_disability_permanent': Property(Types.REAL, 'disability weight incurred permanently'), @@ -1481,6 +1481,7 @@ def read_parameters(self, resourcefilepath: Optional[Path] = None): # ensure that these injuries are the permanent injuries assert non_zero_total_daly_change.to_list() == permanent_injuries + # If using emulator for RTI module, load it here if self.parameters['use_RTI_emulator']: self.RTI_emulator = CTGANSynthesizer.load(filepath= resourcefilepath / 'ResourceFile_RTI/RTI_emulator.pkl') @@ -1521,13 +1522,14 @@ def initialise_population(self, population): healthy.""" df = population.props - if self.parameters['use_RTI_emulator']: - df.loc[df.is_alive, 'rt_disability'] = 0 # default: no DALY - df.loc[df.is_alive, 'rt_disability_permanent'] = 0 # default: no DALY - df.loc[df.is_alive, 'rt_road_traffic_inc'] = False - df.loc[df.is_alive, 'rt_date_inj'] = pd.NaT - - else: + if True: + #if self.parameters['use_RTI_emulator']: + # df.loc[df.is_alive, 'rt_disability'] = 0 # default: no DALY + # df.loc[df.is_alive, 'rt_disability_permanent'] = 0 # default: no DALY + # df.loc[df.is_alive, 'rt_road_traffic_inc'] = False + # df.loc[df.is_alive, 'rt_date_inj'] = pd.NaT + + # else: df.loc[df.is_alive, 'rt_disability'] = 0 # default: no DALY df.loc[df.is_alive, 'rt_disability_permanent'] = 0 # default: no DALY df.loc[df.is_alive, 'rt_road_traffic_inc'] = False @@ -1587,6 +1589,7 @@ def initialise_simulation(self, sim): sim.schedule_event(RTI_Logging_Event(self), sim.date + DateOffset(months=1)) else: + print("======") # If all services are included, set everything to True if sim.modules['HealthSystem'].service_availability == ['*']: for i in sim.modules['RTI'].Rti_Services: @@ -2338,12 +2341,13 @@ def on_birth(self, mother_id, child_id): """ df = self.sim.population.props - if self.parameters['use_RTI_emulator']: - df.at[child_id, 'rt_disability'] = 0 # default: no DALY - df.at[child_id, 'rt_disability_permanent'] = 0 # default: no DALY - df.at[child_id, 'rt_road_traffic_inc'] = False - df.at[child_id, 'rt_date_inj'] = pd.NaT - else: + if True: + #if self.parameters['use_RTI_emulator']: + # df.at[child_id, 'rt_disability'] = 0 # default: no DALY + # df.at[child_id, 'rt_disability_permanent'] = 0 # default: no DALY + # df.at[child_id, 'rt_road_traffic_inc'] = False + # df.at[child_id, 'rt_date_inj'] = pd.NaT + #else: df.at[child_id, 'rt_road_traffic_inc'] = False df.at[child_id, 'rt_inj_severity'] = "none" # default: no one has been injured in a RTI for injury_index in RTI.INJURY_INDICES: @@ -2905,7 +2909,7 @@ def apply(self, population): df = population.props now = self.sim.date - if self.parameters['use_RTI_emulator']: + if self.sim.modules['RTI'].parameters['use_RTI_emulator']: rt_current_non_ind = df.index[df.is_alive & ~df.rt_road_traffic_inc] else: # Reset injury properties after death, get an index of people who have died due to RTI, all causes @@ -2977,58 +2981,60 @@ def apply(self, population): # Set the date that people were injured to now df.loc[selected_for_rti, 'rt_date_inj'] = now - if self.parameters['use_RTI_emulator']: - # This is where we want to replace normal course of events for RTI with emulator. - - # First, sample outcomes for individuals which were selected_for_rti. - # For now, don't consider properties of individual when sampling outcome. All we care about is the number of samples. - condition_for_Rti = Condition( - num_rows=len(selected_for_rti), - column_values=self.sim.modules['RTI'].HS_conditions - ) - NN_model = self.sim.modules['RTI'].RTI_emulator.sample_from_conditions( - conditions=[condition_for_Rti], - ) - - # HS USAGE - # Get the total number of different types of appts that will be accessed as a result of this polling event and add to rolling count. - for column in self.sim.modules['RTI'].HS_Use_Type: - self.sim.modules['RTI'].HS_Use_by_RTI[column] += NN_model[column].sum() # Sum all values in the column - - # Change current properties of the individual and schedule resolution of event. - count = 0 - for person_id in selected_for_rti: - - # These properties are determined by the NN sampling - is_alive_after_RTI = NN_model.loc[count,'is_alive_after_RTI'] + if self.sim.modules['RTI'].parameters['use_RTI_emulator']: - # Why does this require an int wrapper to work with DateOffset? - duration_days = int(NN_model.loc[count,'duration_days']) - - # Individual experiences an immediate death - if is_alive_after_RTI is False and duration_days == 0: - - # For each person selected to experience pre-hospital mortality, schedule an InstantaneosDeath event - self.sim.modules['Demography'].do_death(individual_id=individual_id, cause="RTI_imm_death", - originating_module=self.module) - - # Else individual doesn't immediately die, therefore schedule resolution - else: - # Set disability to what will be the average over duration of the episodef - df.loc[person_id,'rt_disability'] = NN_model.loc[count,'rt_disability_average'] + if len(selected_for_rti)>0: + # This is where we want to replace normal course of events for RTI with emulator. + + # First, sample outcomes for individuals which were selected_for_rti. + # For now, don't consider properties of individual when sampling outcome. All we care about is the number of samples. + condition_for_Rti = Condition( + num_rows=len(selected_for_rti), + column_values=self.sim.modules['RTI'].HS_conditions + ) + NN_model = self.sim.modules['RTI'].RTI_emulator.sample_from_conditions( + conditions=[condition_for_Rti], + ) - # Make sure this person is not 'discoverable' by polling event next month. - #df.loc[person_id,'rt_inj_severity'] = 'mild' # instead of "none", but actually we don't know how severe it is + # HS USAGE + # Get the total number of different types of appts that will be accessed as a result of this polling event and add to rolling count. + for column in self.sim.modules['RTI'].HS_Use_Type: + self.sim.modules['RTI'].HS_Use_by_RTI[column] += NN_model[column].sum() # Sum all values in the column + + # Change current properties of the individual and schedule resolution of event. + count = 0 + for person_id in selected_for_rti: + + # These properties are determined by the NN sampling + is_alive_after_RTI = NN_model.loc[count,'is_alive_after_RTI'] + + # Why does this require an int wrapper to work with DateOffset? + duration_days = int(NN_model.loc[count,'duration_days']) - # Schedule resolution - if is_alive_after_RTI: - # Store permanent disability incurred now to be accessed when Recovery Event is invoked. - df.loc[person_id,'rt_disability_permanent'] = NN_model.loc[count,'rt_disability_permanent'] - self.sim.schedule_event(RTI_NNResolution_Recovery_Event(self.module, person_id), df.loc[person_id, 'rt_date_inj'] + DateOffset(days=duration_days)) + # Individual experiences an immediate death + if is_alive_after_RTI is False and duration_days == 0: + + # For each person selected to experience pre-hospital mortality, schedule an InstantaneosDeath event + self.sim.modules['Demography'].do_death(individual_id=individual_id, cause="RTI_imm_death", + originating_module=self.module) + + # Else individual doesn't immediately die, therefore schedule resolution else: - self.sim.schedule_event(RTI_NNResolution_Death_Event(self.module, person_id), df.loc[person_id, 'rt_date_inj'] + DateOffset(days=duration_days)) - - count += 1 + # Set disability to what will be the average over duration of the episodef + df.loc[person_id,'rt_disability'] = NN_model.loc[count,'rt_disability_average'] + + # Make sure this person is not 'discoverable' by polling event next month. + #df.loc[person_id,'rt_inj_severity'] = 'mild' # instead of "none", but actually we don't know how severe it is + + # Schedule resolution + if is_alive_after_RTI: + # Store permanent disability incurred now to be accessed when Recovery Event is invoked. + df.loc[person_id,'rt_disability_permanent'] = NN_model.loc[count,'rt_disability_permanent'] + self.sim.schedule_event(RTI_NNResolution_Recovery_Event(self.module, person_id), df.loc[person_id, 'rt_date_inj'] + DateOffset(days=duration_days)) + else: + self.sim.schedule_event(RTI_NNResolution_Death_Event(self.module, person_id), df.loc[person_id, 'rt_date_inj'] + DateOffset(days=duration_days)) + + count += 1 else: