From dbff470b51cde44beeefdae3575d52e0c19964bc Mon Sep 17 00:00:00 2001 From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com> Date: Wed, 3 Apr 2024 15:00:09 +0100 Subject: [PATCH 01/54] Investigate analysis of events at sim level --- src/tlo/simulation.py | 9 +++++++++ tests/test_rti.py | 12 ++++++++++++ 2 files changed, 21 insertions(+) diff --git a/src/tlo/simulation.py b/src/tlo/simulation.py index 219b1b8a6f..a641909ed1 100644 --- a/src/tlo/simulation.py +++ b/src/tlo/simulation.py @@ -231,6 +231,15 @@ def simulate(self, *, end_date): if date >= end_date: self.date = end_date break + + #if event.target != self.population: + # print("Event: ", event) + + if event.module == self.modules['RTI']: + print("RTI event ", event) + print(" target ", event.target) + if event.target != self.population: + self.population.props.at[event.tar] self.fire_single_event(event, date) # The simulation has ended. diff --git a/tests/test_rti.py b/tests/test_rti.py index 0e231fb4af..99243b988e 100644 --- a/tests/test_rti.py +++ b/tests/test_rti.py @@ -25,6 +25,17 @@ end_date = Date(2012, 1, 1) popsize = 1000 +@pytest.mark.slow +def test_data_harvesting(seed): + """ + This test runs a simulation with a functioning health system with full service availability and no set + constraints + """ + # create sim object + sim = create_basic_rti_sim(popsize, seed) + # run simulation + sim.simulate(end_date=end_date) + exit(-1) def check_dtypes(simulation): # check types of columns in dataframe, check they are the same, list those that aren't @@ -65,6 +76,7 @@ def test_run(seed): check_dtypes(sim) + @pytest.mark.slow def test_all_injuries_run(seed): """ From 05098f78668a5317667d58cbda882a364a031277 Mon Sep 17 00:00:00 2001 From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com> Date: Mon, 30 Sep 2024 16:26:39 +0200 Subject: [PATCH 02/54] Final data-printing set-up --- src/tlo/methods/demography.py | 7 ++- src/tlo/methods/healthsystem.py | 18 ++++++ src/tlo/methods/hiv.py | 67 ++++++++++++++++++---- src/tlo/methods/tb.py | 99 +++++++++++++++++++++++++-------- src/tlo/simulation.py | 82 ++++++++++++++++++++++++--- 5 files changed, 226 insertions(+), 47 deletions(-) diff --git a/src/tlo/methods/demography.py b/src/tlo/methods/demography.py index e58f3895f4..6b2578fd44 100644 --- a/src/tlo/methods/demography.py +++ b/src/tlo/methods/demography.py @@ -315,9 +315,10 @@ def initialise_simulation(self, sim): # Launch the repeating event that will store statistics about the population structure sim.schedule_event(DemographyLoggingEvent(self), sim.date) - # Create (and store pointer to) the OtherDeathPoll and schedule first occurrence immediately - self.other_death_poll = OtherDeathPoll(self) - sim.schedule_event(self.other_death_poll, sim.date) + if sim.generate_data is False: + # Create (and store pointer to) the OtherDeathPoll and schedule first occurrence immediately + self.other_death_poll = OtherDeathPoll(self) + sim.schedule_event(self.other_death_poll, sim.date) # Log the initial population scaling-factor (to the logger of this module and that of `tlo.methods.population`) for _logger in (logger, logger_scale_factor): diff --git a/src/tlo/methods/healthsystem.py b/src/tlo/methods/healthsystem.py index 181c08f5aa..6e251e636c 100644 --- a/src/tlo/methods/healthsystem.py +++ b/src/tlo/methods/healthsystem.py @@ -2033,8 +2033,26 @@ def run_individual_level_events_in_mode_0_or_1(self, assert event.facility_info is not None, \ f"Cannot run HSI {event.TREATMENT_ID} without facility_info being defined." + go_ahead = False + if (event.module == self.sim.modules['Tb'] or event.module == self.sim.modules['Hiv']): + go_ahead = True + row = self.sim.population.props.iloc[[event.target]] + row['person_ID'] = event.target + row['event'] = event + row['event_date'] = self.sim.date + row['when'] = 'Before' + self.sim.event_chains = pd.concat([self.sim.event_chains, row], ignore_index=True) + # Run the HSI event (allowing it to return an updated appt_footprint) actual_appt_footprint = event.run(squeeze_factor=squeeze_factor) + + if go_ahead: + row = self.sim.population.props.iloc[[event.target]] + row['person_ID'] = event.target + row['event'] = event + row['event_date'] = self.sim.date + row['when'] = 'After' + self.sim.event_chains = pd.concat([self.sim.event_chains, row], ignore_index=True) # Check if the HSI event returned updated appt_footprint if actual_appt_footprint is not None: diff --git a/src/tlo/methods/hiv.py b/src/tlo/methods/hiv.py index d6455cc861..8e0d337fc1 100644 --- a/src/tlo/methods/hiv.py +++ b/src/tlo/methods/hiv.py @@ -631,11 +631,12 @@ def initialise_population(self, population): df.loc[df.is_alive, "hv_date_treated"] = pd.NaT df.loc[df.is_alive, "hv_date_last_ART"] = pd.NaT - # Launch sub-routines for allocating the right number of people into each category - self.initialise_baseline_prevalence(population) # allocate baseline prevalence + if self.sim.generate_data is False: + # Launch sub-routines for allocating the right number of people into each category + self.initialise_baseline_prevalence(population) # allocate baseline prevalence - self.initialise_baseline_art(population) # allocate baseline art coverage - self.initialise_baseline_tested(population) # allocate baseline testing coverage + self.initialise_baseline_art(population) # allocate baseline art coverage + self.initialise_baseline_tested(population) # allocate baseline testing coverage def initialise_baseline_prevalence(self, population): """ @@ -905,10 +906,16 @@ def initialise_simulation(self, sim): df = sim.population.props p = self.parameters - # 1) Schedule the Main HIV Regular Polling Event - sim.schedule_event( - HivRegularPollingEvent(self), sim.date + DateOffset(days=0) - ) + if self.sim.generate_data: + print("Should be generating data") + sim.schedule_event( + HivPollingEventForDataGeneration(self), sim.date + DateOffset(days=0) + ) + else: + # 1) Schedule the Main HIV Regular Polling Event + sim.schedule_event( + HivRegularPollingEvent(self), sim.date + DateOffset(days=0) + ) # 2) Schedule the Logging Event sim.schedule_event(HivLoggingEvent(self), sim.date + DateOffset(years=1)) @@ -1662,6 +1669,37 @@ def do_at_generic_first_appt( # Main Polling Event # --------------------------------------------------------------------------- +class HivPollingEventForDataGeneration(RegularEvent, PopulationScopeEventMixin): + """ The HIV Polling Events for Data Generation + * Ensures that + """ + + def __init__(self, module): + super().__init__( + module, frequency=DateOffset(years=120) + ) # repeats every 12 months, but this can be changed + + def apply(self, population): + + df = population.props + + # Make everyone who is alive and not infected (no-one should be) susceptible + susc_idx = df.loc[ + df.is_alive + & ~df.hv_inf + ].index + + n_susceptible = len(susc_idx) + print("Number of individuals susceptible", n_susceptible) + # Schedule the date of infection for each new infection: + for i in susc_idx: + date_of_infection = self.sim.date + pd.DateOffset( + # Ensure that individual will be infected before end of sim + days=self.module.rng.randint(0, 365*(int(self.sim.end_date.year - self.sim.date.year)+1)) + ) + self.sim.schedule_event( + HivInfectionEvent(self.module, i), date_of_infection + ) class HivRegularPollingEvent(RegularEvent, PopulationScopeEventMixin): """ The HIV Regular Polling Events @@ -1683,6 +1721,7 @@ def apply(self, population): fraction_of_year_between_polls = self.frequency.months / 12 beta = p["beta"] * fraction_of_year_between_polls + # ----------------------------------- HORIZONTAL TRANSMISSION ----------------------------------- def horizontal_transmission(to_sex, from_sex): # Count current number of alive 15-80 year-olds at risk of transmission @@ -1758,6 +1797,7 @@ def horizontal_transmission(to_sex, from_sex): HivInfectionEvent(self.module, idx), date_of_infection ) + # ----------------------------------- SPONTANEOUS TESTING ----------------------------------- def spontaneous_testing(current_year): @@ -1861,11 +1901,12 @@ def vmmc_for_child(): priority=0, ) - # Horizontal transmission: Male --> Female - horizontal_transmission(from_sex="M", to_sex="F") + if self.sim.generate_data is False: + # Horizontal transmission: Male --> Female + horizontal_transmission(from_sex="M", to_sex="F") - # Horizontal transmission: Female --> Male - horizontal_transmission(from_sex="F", to_sex="M") + # Horizontal transmission: Female --> Male + horizontal_transmission(from_sex="F", to_sex="M") # testing # if year later than 2020, set testing rates to those reported in 2020 @@ -1882,6 +1923,8 @@ def vmmc_for_child(): vmmc_for_child() + + # --------------------------------------------------------------------------- # Natural History Events # --------------------------------------------------------------------------- diff --git a/src/tlo/methods/tb.py b/src/tlo/methods/tb.py index 623ee2e483..cd79ae22a5 100644 --- a/src/tlo/methods/tb.py +++ b/src/tlo/methods/tb.py @@ -833,28 +833,29 @@ def initialise_population(self, population): df["tb_date_ipt"] = pd.NaT # # ------------------ infection status ------------------ # - # WHO estimates of active TB for 2010 to get infected initial population - # don't need to scale or include treated proportion as no-one on treatment yet - inc_estimates = p["who_incidence_estimates"] - incidence_year = (inc_estimates.loc[ - (inc_estimates.year == self.sim.date.year), "incidence_per_100k" - ].values[0]) / 100_000 - - incidence_year = incidence_year * p["scaling_factor_WHO"] - - self.assign_active_tb( - population, - strain="ds", - incidence=incidence_year) - - self.assign_active_tb( - population, - strain="mdr", - incidence=incidence_year * p['prop_mdr2010']) - - self.send_for_screening_general( - population - ) # send some baseline population for screening + if self.sim.generate_data is False: + # WHO estimates of active TB for 2010 to get infected initial population + # don't need to scale or include treated proportion as no-one on treatment yet + inc_estimates = p["who_incidence_estimates"] + incidence_year = (inc_estimates.loc[ + (inc_estimates.year == self.sim.date.year), "incidence_per_100k" + ].values[0]) / 100_000 + + incidence_year = incidence_year * p["scaling_factor_WHO"] + + self.assign_active_tb( + population, + strain="ds", + incidence=incidence_year) + + self.assign_active_tb( + population, + strain="mdr", + incidence=incidence_year * p['prop_mdr2010']) + + self.send_for_screening_general( + population + ) # send some baseline population for screening def initialise_simulation(self, sim): """ @@ -867,7 +868,11 @@ def initialise_simulation(self, sim): sim.schedule_event(TbActiveEvent(self), sim.date) sim.schedule_event(TbRegularEvents(self), sim.date) sim.schedule_event(TbSelfCureEvent(self), sim.date) - sim.schedule_event(TbActiveCasePoll(self), sim.date + DateOffset(years=1)) + + if sim.generate_data is False: + sim.schedule_event(TbActiveCasePoll(self), sim.date + DateOffset(years=1)) + else: + sim.schedule_event(TbActiveCasePollGenerateData(self), sim.date + DateOffset(days=0)) # 2) log at the end of the year # Optional: Schedule the scale-up of programs @@ -1366,6 +1371,53 @@ def is_subset(col_for_set, col_for_subset): # # TB infection event # # --------------------------------------------------------------------------- +class TbActiveCasePollGenerateData(RegularEvent, PopulationScopeEventMixin): + """The Tb Regular Poll Event for Data Generation for assigning active infections + * selects everyone to develop an active infection and schedules onset of active tb + sometime during the simulation + """ + + def __init__(self, module): + super().__init__(module, frequency=DateOffset(years=120)) + + def apply(self, population): + + df = population.props + now = self.sim.date + rng = self.module.rng + # Make everyone who is alive and not infected (no-one should be) susceptible + susc_idx = df.loc[ + df.is_alive + & (df.tb_inf != "active") + ].index + + n_susceptible = len(susc_idx) + + middle_index = len(susc_idx) // 2 + + # Will equally split two strains among the population + list_ds = susc_idx[:middle_index] + list_mdr = susc_idx[middle_index:] + + # schedule onset of active tb. This will be equivalent to the "Onset", so it + # doesn't matter how long after we have decided which infection this is. + for person_id in list_ds: + date_progression = now + pd.DateOffset( + # At some point during their lifetime, this person will develop TB + days=self.module.rng.randint(0, 365*(int(self.sim.end_date.year - self.sim.date.year)+1)) + ) + # set date of active tb - properties will be updated at TbActiveEvent poll daily + df.at[person_id, "tb_scheduled_date_active"] = date_progression + df.at[person_id, "tb_strain"] = "ds" + + for person_id in list_mdr: + date_progression = now + pd.DateOffset( + days=rng.randint(0, 365*int(self.sim.end_date.year - self.sim.start_date.year + 1)) + ) + # set date of active tb - properties will be updated at TbActiveEvent poll daily + df.at[person_id, "tb_scheduled_date_active"] = date_progression + df.at[person_id, "tb_strain"] = "mdr" + class TbActiveCasePoll(RegularEvent, PopulationScopeEventMixin): """The Tb Regular Poll Event for assigning active infections @@ -1439,7 +1491,6 @@ def apply(self, population): self.module.update_parameters_for_program_scaleup() - class TbActiveEvent(RegularEvent, PopulationScopeEventMixin): """ * check for those with dates of active tb onset within last time-period diff --git a/src/tlo/simulation.py b/src/tlo/simulation.py index 5b4e2fff4c..f0c8d6f09f 100644 --- a/src/tlo/simulation.py +++ b/src/tlo/simulation.py @@ -7,7 +7,7 @@ from collections import OrderedDict from pathlib import Path from typing import Dict, Optional, Union - +import pandas as pd import numpy as np from tlo import Date, Population, logging @@ -63,9 +63,11 @@ def __init__(self, *, start_date: Date, seed: int = None, log_config: dict = Non self.date = self.start_date = start_date self.modules = OrderedDict() self.event_queue = EventQueue() + self.generate_data = None self.end_date = None self.output_file = None self.population: Optional[Population] = None + self.event_chains: Optinoal[Population] = None self.show_progress_bar = show_progress_bar self.resourcefilepath = resourcefilepath @@ -209,6 +211,8 @@ def make_initial_population(self, *, n): module.initialise_population(self.population) logger.debug(key='debug', data=f'{module.name}.initialise_population() {time.time() - start1} s') + self.event_chains = pd.DataFrame(columns= list(self.population.props.columns)+['person_ID'] + ['event'] + ['event_date'] + ['when']) + end = time.time() logger.info(key='info', data=f'make_initial_population() {end - start} s') @@ -221,7 +225,14 @@ def simulate(self, *, end_date): """ start = time.time() self.end_date = end_date # store the end_date so that others can reference it + self.generate_data = True # for now ensure we're always aiming to print data + + f = open('output.txt', mode='a') + #df_event_chains = pd.DataFrame(columns= list(self.population.props.columns)+['person_ID'] + ['event'] + ['event_date'] + ['when']) + # Reorder columns to place the new columns at the front + pd.set_option('display.max_columns', None) + print(self.event_chains.columns) for module in self.modules.values(): module.initialise_simulation(self) @@ -250,17 +261,72 @@ def simulate(self, *, end_date): if date >= end_date: self.date = end_date + self.event_chains.to_csv('output.csv', index=False) break - + #if event.target != self.population: # print("Event: ", event) - - if event.module == self.modules['RTI']: - print("RTI event ", event) - print(" target ", event.target) - if event.target != self.population: - self.population.props.at[event.tar] + go_ahead = False + df_before = [] + + # Only print events relevant to modules of interest + # Do not want to compare before/after in births because it may expand the pop dataframe + print_output = True + if print_output: + if (event.module == self.modules['Tb'] or event.module == self.modules['Hiv']) and 'TbActiveCasePollGenerateData' not in str(event) and 'HivPollingEventForDataGeneration' not in str(event) and "SimplifiedBirthsPoll" not in str(event) and "AgeUpdateEvent" not in str(event) and "HealthSystemScheduler" not in str(event): + #if 'TbActiveCasePollGenerateData' not in str(event) and 'HivPollingEventForDataGeneration' not in str(event) and "SimplifiedBirthsPoll" not in str(event) and "AgeUpdateEvent" not in str(event): + go_ahead = True + if event.target != self.population: + row = self.population.props.iloc[[event.target]] + row['person_ID'] = event.target + row['event'] = event + row['event_date'] = date + row['when'] = 'Before' + self.event_chains = pd.concat([self.event_chains, row], ignore_index=True) + else: + df_before = self.population.props.copy() + self.fire_single_event(event, date) + + if print_output: + if go_ahead == True: + if event.target != self.population: + row = self.population.props.iloc[[event.target]] + row['person_ID'] = event.target + row['event'] = event + row['event_date'] = date + row['when'] = 'After' + self.event_chains = pd.concat([self.event_chains, row], ignore_index=True) + else: + df_after = self.population.props.copy() + # if not df_before.columns.equals(df_after.columns): + # print("Number of columns in pop dataframe", len(self.population.props.columns)) + # print("Before", df_before.columns) + # print("After", df_after.columns#) + # exit(-1) + # if not df_before.index.equals(df_after.index): + # print("Number of indices in pop dataframe", len(self.population.props.index)) + # print("----> ", event) + # print("Before", df_before.index#) + # print("After", df_after.index) + # exit(-1) + + change = df_before.compare(df_after) + if ~change.empty: + indices = change.index + new_rows_before = df_before.loc[indices] + new_rows_before['person_ID'] = new_rows_before.index + new_rows_before['event'] = event + new_rows_before['event_date'] = date + new_rows_before['when'] = 'Before' + new_rows_after = df_after.loc[indices] + new_rows_after['person_ID'] = new_rows_after.index + new_rows_after['event'] = event + new_rows_after['event_date'] = date + new_rows_after['when'] = 'After' + + self.event_chains = pd.concat([self.event_chains,new_rows_before], ignore_index=True) + self.event_chains = pd.concat([self.event_chains,new_rows_after], ignore_index=True) # The simulation has ended. if self.show_progress_bar: From 16c071c6220edcc20b539f346625f628e5e8c4c5 Mon Sep 17 00:00:00 2001 From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com> Date: Wed, 2 Oct 2024 12:37:38 +0200 Subject: [PATCH 03/54] Print event chains --- src/tlo/methods/demography.py | 2 +- src/tlo/methods/healthsystem.py | 8 ++-- src/tlo/methods/hiv.py | 6 +-- src/tlo/methods/tb.py | 4 +- src/tlo/simulation.py | 47 +++++++++--------- tests/test_data_generation.py | 85 +++++++++++++++++++++++++++++++++ 6 files changed, 117 insertions(+), 35 deletions(-) create mode 100644 tests/test_data_generation.py diff --git a/src/tlo/methods/demography.py b/src/tlo/methods/demography.py index 6b2578fd44..4f19af6d55 100644 --- a/src/tlo/methods/demography.py +++ b/src/tlo/methods/demography.py @@ -315,7 +315,7 @@ def initialise_simulation(self, sim): # Launch the repeating event that will store statistics about the population structure sim.schedule_event(DemographyLoggingEvent(self), sim.date) - if sim.generate_data is False: + if sim.generate_event_chains is False: # Create (and store pointer to) the OtherDeathPoll and schedule first occurrence immediately self.other_death_poll = OtherDeathPoll(self) sim.schedule_event(self.other_death_poll, sim.date) diff --git a/src/tlo/methods/healthsystem.py b/src/tlo/methods/healthsystem.py index 6e251e636c..203ca10985 100644 --- a/src/tlo/methods/healthsystem.py +++ b/src/tlo/methods/healthsystem.py @@ -2033,9 +2033,9 @@ def run_individual_level_events_in_mode_0_or_1(self, assert event.facility_info is not None, \ f"Cannot run HSI {event.TREATMENT_ID} without facility_info being defined." - go_ahead = False - if (event.module == self.sim.modules['Tb'] or event.module == self.sim.modules['Hiv']): - go_ahead = True + print_chains = False + if event.module in self.sim.generate_event_chains_modules_of_interest and all(sub not in str(event) for sub in self.sim.generate_event_chains_ignore_events): + print_chains = True row = self.sim.population.props.iloc[[event.target]] row['person_ID'] = event.target row['event'] = event @@ -2046,7 +2046,7 @@ def run_individual_level_events_in_mode_0_or_1(self, # Run the HSI event (allowing it to return an updated appt_footprint) actual_appt_footprint = event.run(squeeze_factor=squeeze_factor) - if go_ahead: + if print_chains: row = self.sim.population.props.iloc[[event.target]] row['person_ID'] = event.target row['event'] = event diff --git a/src/tlo/methods/hiv.py b/src/tlo/methods/hiv.py index 8e0d337fc1..36b1a4bd6e 100644 --- a/src/tlo/methods/hiv.py +++ b/src/tlo/methods/hiv.py @@ -631,7 +631,7 @@ def initialise_population(self, population): df.loc[df.is_alive, "hv_date_treated"] = pd.NaT df.loc[df.is_alive, "hv_date_last_ART"] = pd.NaT - if self.sim.generate_data is False: + if self.sim.generate_event_chains is False: # Launch sub-routines for allocating the right number of people into each category self.initialise_baseline_prevalence(population) # allocate baseline prevalence @@ -906,7 +906,7 @@ def initialise_simulation(self, sim): df = sim.population.props p = self.parameters - if self.sim.generate_data: + if self.sim.generate_event_chains: print("Should be generating data") sim.schedule_event( HivPollingEventForDataGeneration(self), sim.date + DateOffset(days=0) @@ -1901,7 +1901,7 @@ def vmmc_for_child(): priority=0, ) - if self.sim.generate_data is False: + if self.sim.generate_event_chains is False: # Horizontal transmission: Male --> Female horizontal_transmission(from_sex="M", to_sex="F") diff --git a/src/tlo/methods/tb.py b/src/tlo/methods/tb.py index cd79ae22a5..57ccd97368 100644 --- a/src/tlo/methods/tb.py +++ b/src/tlo/methods/tb.py @@ -833,7 +833,7 @@ def initialise_population(self, population): df["tb_date_ipt"] = pd.NaT # # ------------------ infection status ------------------ # - if self.sim.generate_data is False: + if self.sim.generate_event_chains is False: # WHO estimates of active TB for 2010 to get infected initial population # don't need to scale or include treated proportion as no-one on treatment yet inc_estimates = p["who_incidence_estimates"] @@ -869,7 +869,7 @@ def initialise_simulation(self, sim): sim.schedule_event(TbRegularEvents(self), sim.date) sim.schedule_event(TbSelfCureEvent(self), sim.date) - if sim.generate_data is False: + if sim.generate_event_chains is False: sim.schedule_event(TbActiveCasePoll(self), sim.date + DateOffset(years=1)) else: sim.schedule_event(TbActiveCasePollGenerateData(self), sim.date + DateOffset(days=0)) diff --git a/src/tlo/simulation.py b/src/tlo/simulation.py index f0c8d6f09f..d055d6e367 100644 --- a/src/tlo/simulation.py +++ b/src/tlo/simulation.py @@ -63,7 +63,9 @@ def __init__(self, *, start_date: Date, seed: int = None, log_config: dict = Non self.date = self.start_date = start_date self.modules = OrderedDict() self.event_queue = EventQueue() - self.generate_data = None + self.generate_event_chains = None + self.generate_event_chains_modules_of_interest = [] + self.generate_event_chains_ignore_events = [] self.end_date = None self.output_file = None self.population: Optional[Population] = None @@ -216,7 +218,7 @@ def make_initial_population(self, *, n): end = time.time() logger.info(key='info', data=f'make_initial_population() {end - start} s') - def simulate(self, *, end_date): + def simulate(self, *, end_date, generate_event_chains = False): """Simulation until the given end date :param end_date: when to stop simulating. Only events strictly before this @@ -225,7 +227,11 @@ def simulate(self, *, end_date): """ start = time.time() self.end_date = end_date # store the end_date so that others can reference it - self.generate_data = True # for now ensure we're always aiming to print data + self.generate_event_chains = generate_event_chains # for now ensure we're always aiming to print data + if self.generate_event_chains: + # For now keep these fixed, eventually they will be input from user + self.generate_event_chains_modules_of_interest = [self.modules['Tb'], self.modules['Hiv'], self.modules['CardioMetabolicDisorders']] + self.generate_event_chains_ignore_events = ['TbActiveCasePollGenerateData','HivPollingEventForDataGeneration','SimplifiedBirthsPoll', 'AgeUpdateEvent', 'HealthSystemScheduler'] f = open('output.txt', mode='a') #df_event_chains = pd.DataFrame(columns= list(self.population.props.columns)+['person_ID'] + ['event'] + ['event_date'] + ['when']) @@ -264,17 +270,13 @@ def simulate(self, *, end_date): self.event_chains.to_csv('output.csv', index=False) break - #if event.target != self.population: - # print("Event: ", event) - go_ahead = False + + print_chains = False df_before = [] - # Only print events relevant to modules of interest - # Do not want to compare before/after in births because it may expand the pop dataframe - print_output = True - if print_output: - if (event.module == self.modules['Tb'] or event.module == self.modules['Hiv']) and 'TbActiveCasePollGenerateData' not in str(event) and 'HivPollingEventForDataGeneration' not in str(event) and "SimplifiedBirthsPoll" not in str(event) and "AgeUpdateEvent" not in str(event) and "HealthSystemScheduler" not in str(event): - #if 'TbActiveCasePollGenerateData' not in str(event) and 'HivPollingEventForDataGeneration' not in str(event) and "SimplifiedBirthsPoll" not in str(event) and "AgeUpdateEvent" not in str(event): + if self.generate_event_chains: + # Only print event if it belongs to modules of interest and if it is not in the list of events to ignore + if (event.module in self.generate_event_chains_modules_of_interest) and all(sub not in str(event) for sub in self.generate_event_chains_ignore_events): go_ahead = True if event.target != self.population: row = self.population.props.iloc[[event.target]] @@ -288,7 +290,7 @@ def simulate(self, *, end_date): self.fire_single_event(event, date) - if print_output: + if go_ahead: if go_ahead == True: if event.target != self.population: row = self.population.props.iloc[[event.target]] @@ -299,18 +301,6 @@ def simulate(self, *, end_date): self.event_chains = pd.concat([self.event_chains, row], ignore_index=True) else: df_after = self.population.props.copy() - # if not df_before.columns.equals(df_after.columns): - # print("Number of columns in pop dataframe", len(self.population.props.columns)) - # print("Before", df_before.columns) - # print("After", df_after.columns#) - # exit(-1) - # if not df_before.index.equals(df_after.index): - # print("Number of indices in pop dataframe", len(self.population.props.index)) - # print("----> ", event) - # print("Before", df_before.index#) - # print("After", df_after.index) - # exit(-1) - change = df_before.compare(df_after) if ~change.empty: indices = change.index @@ -385,6 +375,13 @@ def do_birth(self, mother_id): child_id = self.population.do_birth() for module in self.modules.values(): module.on_birth(mother_id, child_id) + if self.generate_event_chains: + row = self.population.props.iloc[[child_id]] + row['person_ID'] = child_id + row['event'] = 'Birth' + row['event_date'] = self.date + row['when'] = 'After' + self.event_chains = pd.concat([self.event_chains, row], ignore_index=True) return child_id def find_events_for_person(self, person_id: int): diff --git a/tests/test_data_generation.py b/tests/test_data_generation.py new file mode 100644 index 0000000000..1f6333bbfe --- /dev/null +++ b/tests/test_data_generation.py @@ -0,0 +1,85 @@ +import os +from pathlib import Path + +import pandas as pd +import pytest + +from tlo import Date, Simulation +from tlo.methods import ( + care_of_women_during_pregnancy, + demography, + depression, + enhanced_lifestyle, + epi, + epilepsy, + healthburden, + healthseekingbehaviour, + healthsystem, + hiv, + cardio_metabolic_disorders, + labour, + newborn_outcomes, + postnatal_supervisor, + pregnancy_helper_functions, + pregnancy_supervisor, + depression, + tb, + contraception, +# simplified_births, + symptommanager, +) +from tlo.methods.hsi_generic_first_appts import HSI_GenericEmergencyFirstAppt + +# create simulation parameters +start_date = Date(2010, 1, 1) +end_date = Date(2015, 1, 1) +popsize = 100 + +@pytest.mark.slow +def test_data_harvesting(seed): + """ + This test runs a simulation to print all individual events of specific individuals + """ + + module_of_interest = 'Hiv' + # create sim object + sim = create_basic_sim(popsize, seed) + + dependencies_list = sim.modules[module_of_interest].ADDITIONAL_DEPENDENCIES.union(sim.modules[module_of_interest].INIT_DEPENDENCIES) + + # Check that all dependencies are included + for dep in dependencies_list: + if dep not in sim.modules: + print("WARNING: dependency ", dep, "not included") + exit(-1) + + # run simulation + sim.simulate(end_date=end_date, generate_event_chains = True) + + +def create_basic_sim(population_size, seed): + # create the basic outline of an rti simulation object + sim = Simulation(start_date=start_date, seed=seed) + resourcefilepath = Path(os.path.dirname(__file__)) / '../resources' + sim.register(demography.Demography(resourcefilepath=resourcefilepath), + contraception.Contraception(resourcefilepath=resourcefilepath), + enhanced_lifestyle.Lifestyle(resourcefilepath=resourcefilepath), + healthburden.HealthBurden(resourcefilepath=resourcefilepath), + symptommanager.SymptomManager(resourcefilepath=resourcefilepath), + healthsystem.HealthSystem(resourcefilepath=resourcefilepath, service_availability=['*']), + healthseekingbehaviour.HealthSeekingBehaviour(resourcefilepath=resourcefilepath), + epi.Epi(resourcefilepath=resourcefilepath), + hiv.Hiv(resourcefilepath=resourcefilepath), + tb.Tb(resourcefilepath=resourcefilepath), + cardio_metabolic_disorders.CardioMetabolicDisorders(resourcefilepath=resourcefilepath), + depression.Depression(resourcefilepath=resourcefilepath), + newborn_outcomes.NewbornOutcomes(resourcefilepath=resourcefilepath), + pregnancy_supervisor.PregnancySupervisor(resourcefilepath=resourcefilepath), + care_of_women_during_pregnancy.CareOfWomenDuringPregnancy(resourcefilepath=resourcefilepath), + labour.Labour(resourcefilepath=resourcefilepath), + postnatal_supervisor.PostnatalSupervisor(resourcefilepath=resourcefilepath), + ) + + sim.make_initial_population(n=population_size) + return sim + From ba81487a3fa003e2f10206e435a1d64f170f14e3 Mon Sep 17 00:00:00 2001 From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com> Date: Wed, 2 Oct 2024 13:08:50 +0200 Subject: [PATCH 04/54] Add chains in mode 2 too and clean up in simuation --- src/tlo/methods/healthsystem.py | 40 ++++++++++++++++++------ src/tlo/simulation.py | 55 ++++++++++++++++----------------- 2 files changed, 58 insertions(+), 37 deletions(-) diff --git a/src/tlo/methods/healthsystem.py b/src/tlo/methods/healthsystem.py index 203ca10985..54cb976b26 100644 --- a/src/tlo/methods/healthsystem.py +++ b/src/tlo/methods/healthsystem.py @@ -2034,18 +2034,20 @@ def run_individual_level_events_in_mode_0_or_1(self, f"Cannot run HSI {event.TREATMENT_ID} without facility_info being defined." print_chains = False - if event.module in self.sim.generate_event_chains_modules_of_interest and all(sub not in str(event) for sub in self.sim.generate_event_chains_ignore_events): - print_chains = True - row = self.sim.population.props.iloc[[event.target]] - row['person_ID'] = event.target - row['event'] = event - row['event_date'] = self.sim.date - row['when'] = 'Before' - self.sim.event_chains = pd.concat([self.sim.event_chains, row], ignore_index=True) + if self.sim.generate_event_chains: + if event.module in self.sim.generate_event_chains_modules_of_interest and all(sub not in str(event) for sub in self.sim.generate_event_chains_ignore_events): + print_chains = True + row = self.sim.population.props.iloc[[event.target]] + row['person_ID'] = event.target + row['event'] = event + row['event_date'] = self.sim.date + row['when'] = 'Before' + self.sim.event_chains = pd.concat([self.sim.event_chains, row], ignore_index=True) # Run the HSI event (allowing it to return an updated appt_footprint) actual_appt_footprint = event.run(squeeze_factor=squeeze_factor) + # Print individual info after event if print_chains: row = self.sim.population.props.iloc[[event.target]] row['person_ID'] = event.target @@ -2445,8 +2447,28 @@ def process_events_mode_2(self, hold_over: List[HSIEventQueueItem]) -> None: # Expected appt footprint before running event _appt_footprint_before_running = event.EXPECTED_APPT_FOOTPRINT - # Run event & get actual footprint + + print_chains = False + if self.sim.generate_event_chains: + if event.module in self.sim.generate_event_chains_modules_of_interest and all(sub not in str(event) for sub in self.sim.generate_event_chains_ignore_events): + print_chains = True + row = self.sim.population.props.iloc[[event.target]] + row['person_ID'] = event.target + row['event'] = event + row['event_date'] = self.sim.date + row['when'] = 'Before' + self.sim.event_chains = pd.concat([self.sim.event_chains, row], ignore_index=True) + + # Run the HSI event (allowing it to return an updated appt_footprint) actual_appt_footprint = event.run(squeeze_factor=squeeze_factor) + + if print_chains: + row = self.sim.population.props.iloc[[event.target]] + row['person_ID'] = event.target + row['event'] = event + row['event_date'] = self.sim.date + row['when'] = 'After' + self.sim.event_chains = pd.concat([self.sim.event_chains, row], ignore_index=True) # Check if the HSI event returned updated_appt_footprint, and if so adjust original_call if actual_appt_footprint is not None: diff --git a/src/tlo/simulation.py b/src/tlo/simulation.py index d055d6e367..616e159453 100644 --- a/src/tlo/simulation.py +++ b/src/tlo/simulation.py @@ -277,7 +277,7 @@ def simulate(self, *, end_date, generate_event_chains = False): if self.generate_event_chains: # Only print event if it belongs to modules of interest and if it is not in the list of events to ignore if (event.module in self.generate_event_chains_modules_of_interest) and all(sub not in str(event) for sub in self.generate_event_chains_ignore_events): - go_ahead = True + print_chains = True if event.target != self.population: row = self.population.props.iloc[[event.target]] row['person_ID'] = event.target @@ -290,33 +290,32 @@ def simulate(self, *, end_date, generate_event_chains = False): self.fire_single_event(event, date) - if go_ahead: - if go_ahead == True: - if event.target != self.population: - row = self.population.props.iloc[[event.target]] - row['person_ID'] = event.target - row['event'] = event - row['event_date'] = date - row['when'] = 'After' - self.event_chains = pd.concat([self.event_chains, row], ignore_index=True) - else: - df_after = self.population.props.copy() - change = df_before.compare(df_after) - if ~change.empty: - indices = change.index - new_rows_before = df_before.loc[indices] - new_rows_before['person_ID'] = new_rows_before.index - new_rows_before['event'] = event - new_rows_before['event_date'] = date - new_rows_before['when'] = 'Before' - new_rows_after = df_after.loc[indices] - new_rows_after['person_ID'] = new_rows_after.index - new_rows_after['event'] = event - new_rows_after['event_date'] = date - new_rows_after['when'] = 'After' - - self.event_chains = pd.concat([self.event_chains,new_rows_before], ignore_index=True) - self.event_chains = pd.concat([self.event_chains,new_rows_after], ignore_index=True) + if print_chains: + if event.target != self.population: + row = self.population.props.iloc[[event.target]] + row['person_ID'] = event.target + row['event'] = event + row['event_date'] = date + row['when'] = 'After' + self.event_chains = pd.concat([self.event_chains, row], ignore_index=True) + else: + df_after = self.population.props.copy() + change = df_before.compare(df_after) + if ~change.empty: + indices = change.index + new_rows_before = df_before.loc[indices] + new_rows_before['person_ID'] = new_rows_before.index + new_rows_before['event'] = event + new_rows_before['event_date'] = date + new_rows_before['when'] = 'Before' + new_rows_after = df_after.loc[indices] + new_rows_after['person_ID'] = new_rows_after.index + new_rows_after['event'] = event + new_rows_after['event_date'] = date + new_rows_after['when'] = 'After' + + self.event_chains = pd.concat([self.event_chains,new_rows_before], ignore_index=True) + self.event_chains = pd.concat([self.event_chains,new_rows_after], ignore_index=True) # The simulation has ended. if self.show_progress_bar: From b1c907c12bfa54621983415b560381d1737afc9a Mon Sep 17 00:00:00 2001 From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com> Date: Mon, 7 Oct 2024 09:36:06 +0200 Subject: [PATCH 05/54] Fix issue with tests by ensuring standard Polling and infection is maintained is generate_event_chains is None --- src/tlo/methods/hiv.py | 6 +++--- src/tlo/methods/hsi_event.py | 14 ++++++++------ src/tlo/methods/tb.py | 10 ++++++---- src/tlo/simulation.py | 4 +++- 4 files changed, 20 insertions(+), 14 deletions(-) diff --git a/src/tlo/methods/hiv.py b/src/tlo/methods/hiv.py index 36b1a4bd6e..391cf587a8 100644 --- a/src/tlo/methods/hiv.py +++ b/src/tlo/methods/hiv.py @@ -631,7 +631,7 @@ def initialise_population(self, population): df.loc[df.is_alive, "hv_date_treated"] = pd.NaT df.loc[df.is_alive, "hv_date_last_ART"] = pd.NaT - if self.sim.generate_event_chains is False: + if self.sim.generate_event_chains is False or self.sim.generate_event_chains is None or self.sim.generate_event_chains_overwrite_epi is False: # Launch sub-routines for allocating the right number of people into each category self.initialise_baseline_prevalence(population) # allocate baseline prevalence @@ -906,7 +906,7 @@ def initialise_simulation(self, sim): df = sim.population.props p = self.parameters - if self.sim.generate_event_chains: + if self.sim.generate_event_chains is True and self.sim.generate_event_chains_overwrite_epi: print("Should be generating data") sim.schedule_event( HivPollingEventForDataGeneration(self), sim.date + DateOffset(days=0) @@ -1901,7 +1901,7 @@ def vmmc_for_child(): priority=0, ) - if self.sim.generate_event_chains is False: + if self.sim.generate_event_chains is False or self.sim.generate_event_chains is None or self.sim.generate_event_chains_overwrite_epi is False: # Horizontal transmission: Male --> Female horizontal_transmission(from_sex="M", to_sex="F") diff --git a/src/tlo/methods/hsi_event.py b/src/tlo/methods/hsi_event.py index 470794bcdd..785f27b7a6 100644 --- a/src/tlo/methods/hsi_event.py +++ b/src/tlo/methods/hsi_event.py @@ -193,10 +193,12 @@ def run(self, squeeze_factor): print_chains = False df_before = [] - + if self.sim.generate_event_chains: # Only print event if it belongs to modules of interest and if it is not in the list of events to ignore - if (self.module in self.sim.generate_event_chains_modules_of_interest) and all(sub not in str(self) for sub in self.sim.generate_event_chains_ignore_events): + #if (self.module in self.sim.generate_event_chains_modules_of_interest) and + if all(sub not in str(self) for sub in self.sim.generate_event_chains_ignore_events): + print_chains = True if self.target != self.sim.population: row = self.sim.population.props.iloc[[self.target]] @@ -204,7 +206,7 @@ def run(self, squeeze_factor): row['event'] = self row['event_date'] = self.sim.date row['when'] = 'Before' - self.event_chains = pd.concat([self.sim.event_chains, row], ignore_index=True) + self.sim.event_chains = pd.concat([self.sim.event_chains, row], ignore_index=True) else: df_before = self.sim.population.props.copy() @@ -219,7 +221,7 @@ def run(self, squeeze_factor): row['event'] = self row['event_date'] = self.sim.date row['when'] = 'After' - self.event_chains = pd.concat([self.sim.event_chains, row], ignore_index=True) + self.sim.event_chains = pd.concat([self.sim.event_chains, row], ignore_index=True) else: df_after = self.sim.population.props.copy() change = df_before.compare(df_after) @@ -236,8 +238,8 @@ def run(self, squeeze_factor): new_rows_after['event_date'] = self.sim.date new_rows_after['when'] = 'After' - self.event_chains = pd.concat([self.sim.event_chains,new_rows_before], ignore_index=True) - self.event_chains = pd.concat([self.sim.event_chains,new_rows_after], ignore_index=True) + self.sim.event_chains = pd.concat([self.sim.event_chains,new_rows_before], ignore_index=True) + self.sim.event_chains = pd.concat([self.sim.event_chains,new_rows_after], ignore_index=True) return updated_appt_footprint def get_consumables( diff --git a/src/tlo/methods/tb.py b/src/tlo/methods/tb.py index 57ccd97368..4c170944d2 100644 --- a/src/tlo/methods/tb.py +++ b/src/tlo/methods/tb.py @@ -832,8 +832,9 @@ def initialise_population(self, population): df["tb_on_ipt"] = False df["tb_date_ipt"] = pd.NaT + # # ------------------ infection status ------------------ # - if self.sim.generate_event_chains is False: + if self.sim.generate_event_chains is False or self.sim.generate_event_chains is None: # WHO estimates of active TB for 2010 to get infected initial population # don't need to scale or include treated proportion as no-one on treatment yet inc_estimates = p["who_incidence_estimates"] @@ -869,10 +870,11 @@ def initialise_simulation(self, sim): sim.schedule_event(TbRegularEvents(self), sim.date) sim.schedule_event(TbSelfCureEvent(self), sim.date) - if sim.generate_event_chains is False: - sim.schedule_event(TbActiveCasePoll(self), sim.date + DateOffset(years=1)) - else: + if sim.generate_event_chains is True and sim.generate_event_chains_overwrite_epi is True: sim.schedule_event(TbActiveCasePollGenerateData(self), sim.date + DateOffset(days=0)) + else: + sim.schedule_event(TbActiveCasePoll(self), sim.date + DateOffset(years=1)) + # 2) log at the end of the year # Optional: Schedule the scale-up of programs diff --git a/src/tlo/simulation.py b/src/tlo/simulation.py index 794bfef98e..4aff23c9d7 100644 --- a/src/tlo/simulation.py +++ b/src/tlo/simulation.py @@ -105,6 +105,7 @@ def __init__( self.modules = OrderedDict() self.event_queue = EventQueue() self.generate_event_chains = None + self.generate_event_chains_overwrite_epi = None self.generate_event_chains_modules_of_interest = [] self.generate_event_chains_ignore_events = [] self.end_date = None @@ -298,10 +299,11 @@ def initialise(self, *, end_date: Date, generate_event_chains) -> None: self.end_date = end_date # store the end_date so that others can reference it self.generate_event_chains = generate_event_chains # for now ensure we're always aiming to print data + self.generate_event_chains_overwrite_epi = False if self.generate_event_chains: # For now keep these fixed, eventually they will be input from user self.generate_event_chains_modules_of_interest = [self.modules['Tb'], self.modules['Hiv'], self.modules['CardioMetabolicDisorders']] - self.generate_event_chains_ignore_events = ['TbActiveCasePollGenerateData','HivPollingEventForDataGeneration','SimplifiedBirthsPoll', 'AgeUpdateEvent', 'HealthSystemScheduler'] + self.generate_event_chains_ignore_events = ['AgeUpdateEvent','HealthSystemScheduler', 'DirectBirth'] #['TbActiveCasePollGenerateData','HivPollingEventForDataGeneration','SimplifiedBirthsPoll', 'AgeUpdateEvent', 'HealthSystemScheduler'] #df_event_chains = pd.DataFrame(columns= list(self.population.props.columns)+['person_ID'] + ['event'] + ['event_date'] + ['when']) From cfb4264f0133fccbc0a82a6c9d3f51479d19038f Mon Sep 17 00:00:00 2001 From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com> Date: Mon, 7 Oct 2024 15:51:37 +0200 Subject: [PATCH 06/54] Switch iloc for loc --- src/tlo/events.py | 5 ++--- src/tlo/methods/hsi_event.py | 4 ++-- src/tlo/simulation.py | 9 ++++++--- tests/test_data_generation.py | 2 +- 4 files changed, 11 insertions(+), 9 deletions(-) diff --git a/src/tlo/events.py b/src/tlo/events.py index 78b828091d..a50832a58d 100644 --- a/src/tlo/events.py +++ b/src/tlo/events.py @@ -74,7 +74,7 @@ def run(self): if (self.module in self.sim.generate_event_chains_modules_of_interest) and all(sub not in str(self) for sub in self.sim.generate_event_chains_ignore_events): print_chains = True if self.target != self.sim.population: - row = self.sim.population.props.iloc[[self.target]] + row = self.sim.population.props.loc[[self.target]] row['person_ID'] = self.target row['event'] = self row['event_date'] = self.sim.date @@ -83,13 +83,12 @@ def run(self): else: df_before = self.sim.population.props.copy() - self.apply(self.target) self.post_apply_hook() if print_chains: if self.target != self.sim.population: - row = self.sim.population.props.iloc[[self.target]] + row = self.sim.population.props.loc[[self.target]] row['person_ID'] = self.target row['event'] = self row['event_date'] = self.sim.date diff --git a/src/tlo/methods/hsi_event.py b/src/tlo/methods/hsi_event.py index 785f27b7a6..cffeb32992 100644 --- a/src/tlo/methods/hsi_event.py +++ b/src/tlo/methods/hsi_event.py @@ -201,7 +201,7 @@ def run(self, squeeze_factor): print_chains = True if self.target != self.sim.population: - row = self.sim.population.props.iloc[[self.target]] + row = self.sim.population.props.loc[[self.target]] row['person_ID'] = self.target row['event'] = self row['event_date'] = self.sim.date @@ -216,7 +216,7 @@ def run(self, squeeze_factor): if print_chains: if self.target != self.sim.population: - row = self.sim.population.props.iloc[[self.target]] + row = self.sim.population.props.loc[[self.target]] row['person_ID'] = self.target row['event'] = self row['event_date'] = self.sim.date diff --git a/src/tlo/simulation.py b/src/tlo/simulation.py index 4aff23c9d7..42a2a288d3 100644 --- a/src/tlo/simulation.py +++ b/src/tlo/simulation.py @@ -298,14 +298,17 @@ def initialise(self, *, end_date: Date, generate_event_chains) -> None: self.date = self.start_date self.end_date = end_date # store the end_date so that others can reference it - self.generate_event_chains = generate_event_chains # for now ensure we're always aiming to print data - self.generate_event_chains_overwrite_epi = False + self.generate_event_chains = generate_event_chains if self.generate_event_chains: + # Eventually this can be made an option + self.generate_event_chains_overwrite_epi = True # For now keep these fixed, eventually they will be input from user self.generate_event_chains_modules_of_interest = [self.modules['Tb'], self.modules['Hiv'], self.modules['CardioMetabolicDisorders']] self.generate_event_chains_ignore_events = ['AgeUpdateEvent','HealthSystemScheduler', 'DirectBirth'] #['TbActiveCasePollGenerateData','HivPollingEventForDataGeneration','SimplifiedBirthsPoll', 'AgeUpdateEvent', 'HealthSystemScheduler'] + else: + # If not using to print chains, cannot ignore epi + self.generate_event_chains_overwrite_epi = False - #df_event_chains = pd.DataFrame(columns= list(self.population.props.columns)+['person_ID'] + ['event'] + ['event_date'] + ['when']) # Reorder columns to place the new columns at the front pd.set_option('display.max_columns', None) diff --git a/tests/test_data_generation.py b/tests/test_data_generation.py index 1f6333bbfe..8dd92513f9 100644 --- a/tests/test_data_generation.py +++ b/tests/test_data_generation.py @@ -32,7 +32,7 @@ # create simulation parameters start_date = Date(2010, 1, 1) -end_date = Date(2015, 1, 1) +end_date = Date(2014, 1, 1) popsize = 100 @pytest.mark.slow From e0327de6b6f850ac871a2308271f6863333f173e Mon Sep 17 00:00:00 2001 From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com> Date: Mon, 7 Oct 2024 15:55:57 +0200 Subject: [PATCH 07/54] Change syntax of if statement --- src/tlo/events.py | 2 +- src/tlo/methods/hsi_event.py | 4 +--- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/src/tlo/events.py b/src/tlo/events.py index a50832a58d..2eef87ba3f 100644 --- a/src/tlo/events.py +++ b/src/tlo/events.py @@ -71,7 +71,7 @@ def run(self): if self.sim.generate_event_chains: # Only print event if it belongs to modules of interest and if it is not in the list of events to ignore - if (self.module in self.sim.generate_event_chains_modules_of_interest) and all(sub not in str(self) for sub in self.sim.generate_event_chains_ignore_events): + if (self.module in self.sim.generate_event_chains_modules_of_interest) and not set(self.sim.generate_event_chains_ignore_events).intersect(str(self)): print_chains = True if self.target != self.sim.population: row = self.sim.population.props.loc[[self.target]] diff --git a/src/tlo/methods/hsi_event.py b/src/tlo/methods/hsi_event.py index cffeb32992..805c9584fb 100644 --- a/src/tlo/methods/hsi_event.py +++ b/src/tlo/methods/hsi_event.py @@ -196,9 +196,7 @@ def run(self, squeeze_factor): if self.sim.generate_event_chains: # Only print event if it belongs to modules of interest and if it is not in the list of events to ignore - #if (self.module in self.sim.generate_event_chains_modules_of_interest) and - if all(sub not in str(self) for sub in self.sim.generate_event_chains_ignore_events): - + if (self.module in self.sim.generate_event_chains_modules_of_interest) and not set(self.sim.generate_event_chains_ignore_events).intersect(str(self)): print_chains = True if self.target != self.sim.population: row = self.sim.population.props.loc[[self.target]] From fceee02e68722e29314c3d9efe35983709a78deb Mon Sep 17 00:00:00 2001 From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com> Date: Wed, 9 Oct 2024 09:27:54 +0100 Subject: [PATCH 08/54] Change syntax of if statement and print string of event --- src/tlo/events.py | 6 +++--- src/tlo/methods/hsi_event.py | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/tlo/events.py b/src/tlo/events.py index 2eef87ba3f..2a7871c2c8 100644 --- a/src/tlo/events.py +++ b/src/tlo/events.py @@ -71,12 +71,12 @@ def run(self): if self.sim.generate_event_chains: # Only print event if it belongs to modules of interest and if it is not in the list of events to ignore - if (self.module in self.sim.generate_event_chains_modules_of_interest) and not set(self.sim.generate_event_chains_ignore_events).intersect(str(self)): + if (self.module in self.sim.generate_event_chains_modules_of_interest) and not set(self.sim.generate_event_chains_ignore_events).intersection(str(self)): print_chains = True if self.target != self.sim.population: row = self.sim.population.props.loc[[self.target]] row['person_ID'] = self.target - row['event'] = self + row['event'] = str(self) row['event_date'] = self.sim.date row['when'] = 'Before' self.sim.event_chains = pd.concat([self.sim.event_chains, row], ignore_index=True) @@ -90,7 +90,7 @@ def run(self): if self.target != self.sim.population: row = self.sim.population.props.loc[[self.target]] row['person_ID'] = self.target - row['event'] = self + row['event'] = str(self) row['event_date'] = self.sim.date row['when'] = 'After' self.sim.event_chains = pd.concat([self.sim.event_chains, row], ignore_index=True) diff --git a/src/tlo/methods/hsi_event.py b/src/tlo/methods/hsi_event.py index 805c9584fb..ea9066bc8b 100644 --- a/src/tlo/methods/hsi_event.py +++ b/src/tlo/methods/hsi_event.py @@ -196,12 +196,12 @@ def run(self, squeeze_factor): if self.sim.generate_event_chains: # Only print event if it belongs to modules of interest and if it is not in the list of events to ignore - if (self.module in self.sim.generate_event_chains_modules_of_interest) and not set(self.sim.generate_event_chains_ignore_events).intersect(str(self)): + if (self.module in self.sim.generate_event_chains_modules_of_interest) and not set(self.sim.generate_event_chains_ignore_events).intersection(str(self)): print_chains = True if self.target != self.sim.population: row = self.sim.population.props.loc[[self.target]] row['person_ID'] = self.target - row['event'] = self + row['event'] = str(self) row['event_date'] = self.sim.date row['when'] = 'Before' self.sim.event_chains = pd.concat([self.sim.event_chains, row], ignore_index=True) @@ -216,7 +216,7 @@ def run(self, squeeze_factor): if self.target != self.sim.population: row = self.sim.population.props.loc[[self.target]] row['person_ID'] = self.target - row['event'] = self + row['event'] = str(self) row['event_date'] = self.sim.date row['when'] = 'After' self.sim.event_chains = pd.concat([self.sim.event_chains, row], ignore_index=True) From eaeae626a4b37c024db38abf82bdb7c2e723ffe2 Mon Sep 17 00:00:00 2001 From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com> Date: Thu, 10 Oct 2024 14:45:41 +0100 Subject: [PATCH 09/54] Focus on rti and print footprint --- src/tlo/events.py | 16 +++++++++++++--- src/tlo/methods/hsi_event.py | 36 ++++++++++++++++------------------- src/tlo/methods/rti.py | 8 ++++++-- src/tlo/simulation.py | 6 +++--- tests/test_data_generation.py | 31 ++++++++++++++++-------------- 5 files changed, 55 insertions(+), 42 deletions(-) diff --git a/src/tlo/events.py b/src/tlo/events.py index 2a7871c2c8..76e1b9a117 100644 --- a/src/tlo/events.py +++ b/src/tlo/events.py @@ -71,14 +71,19 @@ def run(self): if self.sim.generate_event_chains: # Only print event if it belongs to modules of interest and if it is not in the list of events to ignore - if (self.module in self.sim.generate_event_chains_modules_of_interest) and not set(self.sim.generate_event_chains_ignore_events).intersection(str(self)): + #if (self.module in self.sim.generate_event_chains_modules_of_interest) and not + #if not set(self.sim.generate_event_chains_ignore_events).intersection(str(self)): + #if (self.module in self.sim.generate_event_chains_modules_of_interest) and all(sub not in str(self) for sub in self.sim.generate_event_chains_ignore_events): + if all(sub not in str(self) for sub in self.sim.generate_event_chains_ignore_events): + print_chains = True if self.target != self.sim.population: - row = self.sim.population.props.loc[[self.target]] + row = self.sim.population.props.iloc[[self.target]] row['person_ID'] = self.target row['event'] = str(self) row['event_date'] = self.sim.date row['when'] = 'Before' + row['appt_footprint'] = 'N/A' self.sim.event_chains = pd.concat([self.sim.event_chains, row], ignore_index=True) else: df_before = self.sim.population.props.copy() @@ -88,11 +93,12 @@ def run(self): if print_chains: if self.target != self.sim.population: - row = self.sim.population.props.loc[[self.target]] + row = self.sim.population.props.iloc[[self.target]] row['person_ID'] = self.target row['event'] = str(self) row['event_date'] = self.sim.date row['when'] = 'After' + row['appt_footprint'] = 'N/A' self.sim.event_chains = pd.concat([self.sim.event_chains, row], ignore_index=True) else: df_after = self.sim.population.props.copy() @@ -104,11 +110,15 @@ def run(self): new_rows_before['event'] = self new_rows_before['event_date'] = self.sim.date new_rows_before['when'] = 'Before' + new_rows_before['appt_footprint'] = 'N/A' + new_rows_after = df_after.loc[indices] new_rows_after['person_ID'] = new_rows_after.index new_rows_after['event'] = self new_rows_after['event_date'] = self.sim.date new_rows_after['when'] = 'After' + new_rows_after['appt_footprint'] = 'N/A' + self.sim.event_chains = pd.concat([self.sim.event_chains,new_rows_before], ignore_index=True) self.sim.event_chains = pd.concat([self.sim.event_chains,new_rows_after], ignore_index=True) diff --git a/src/tlo/methods/hsi_event.py b/src/tlo/methods/hsi_event.py index ea9066bc8b..f8e8738543 100644 --- a/src/tlo/methods/hsi_event.py +++ b/src/tlo/methods/hsi_event.py @@ -196,14 +196,19 @@ def run(self, squeeze_factor): if self.sim.generate_event_chains: # Only print event if it belongs to modules of interest and if it is not in the list of events to ignore - if (self.module in self.sim.generate_event_chains_modules_of_interest) and not set(self.sim.generate_event_chains_ignore_events).intersection(str(self)): + #if (self.module in self.sim.generate_event_chains_modules_of_interest) and not + #if not set(self.sim.generate_event_chains_ignore_events).intersection(str(self)): +# if (self.module in self.sim.generate_event_chains_modules_of_interest) and all(sub not in str(self) for sub in self.sim.generate_event_chains_ignore_events): + if all(sub not in str(self) for sub in self.sim.generate_event_chains_ignore_events): + print_chains = True if self.target != self.sim.population: - row = self.sim.population.props.loc[[self.target]] + row = self.sim.population.props.iloc[[self.target]] row['person_ID'] = self.target row['event'] = str(self) row['event_date'] = self.sim.date row['when'] = 'Before' + row['appt_footprint'] = str(self.EXPECTED_APPT_FOOTPRINT) self.sim.event_chains = pd.concat([self.sim.event_chains, row], ignore_index=True) else: df_before = self.sim.population.props.copy() @@ -212,32 +217,23 @@ def run(self, squeeze_factor): self.post_apply_hook() self._run_after_hsi_event() + footprint = self.EXPECTED_APPT_FOOTPRINT + if updated_appt_footprint is not None: + footprint = updated_appt_footprint + if print_chains: if self.target != self.sim.population: - row = self.sim.population.props.loc[[self.target]] + row = self.sim.population.props.iloc[[self.target]] row['person_ID'] = self.target row['event'] = str(self) row['event_date'] = self.sim.date row['when'] = 'After' + row['appt_footprint'] = str(footprint) self.sim.event_chains = pd.concat([self.sim.event_chains, row], ignore_index=True) else: - df_after = self.sim.population.props.copy() - change = df_before.compare(df_after) - if ~change.empty: - indices = change.index - new_rows_before = df_before.loc[indices] - new_rows_before['person_ID'] = new_rows_before.index - new_rows_before['event'] = self - new_rows_before['event_date'] = self.sim.date - new_rows_before['when'] = 'Before' - new_rows_after = df_after.loc[indices] - new_rows_after['person_ID'] = new_rows_after.index - new_rows_after['event'] = self - new_rows_after['event_date'] = self.sim.date - new_rows_after['when'] = 'After' - - self.sim.event_chains = pd.concat([self.sim.event_chains,new_rows_before], ignore_index=True) - self.sim.event_chains = pd.concat([self.sim.event_chains,new_rows_after], ignore_index=True) + print("Error, I shouldn't be here") + exit(-1) + return updated_appt_footprint def get_consumables( diff --git a/src/tlo/methods/rti.py b/src/tlo/methods/rti.py index 18c1987483..1c12e7162b 100644 --- a/src/tlo/methods/rti.py +++ b/src/tlo/methods/rti.py @@ -2776,7 +2776,7 @@ class RTIPollingEvent(RegularEvent, PopulationScopeEventMixin): def __init__(self, module): """Schedule to take place every month """ - super().__init__(module, frequency=DateOffset(months=1)) + super().__init__(module, frequency=DateOffset(months=1000)) p = module.parameters # Parameters which transition the model between states self.base_1m_prob_rti = (p['base_rate_injrti'] / 12) @@ -2864,9 +2864,13 @@ def apply(self, population): .when('.between(70,79)', self.rr_injrti_age7079), Predictor('li_ex_alc').when(True, self.rr_injrti_excessalcohol) ) - pred = eq.predict(df.loc[rt_current_non_ind]) + if self.sim.generate_event_chains is True and self.sim.generate_event_chains_overwrite_epi is True: + pred = 1 + else: + pred = eq.predict(df.loc[rt_current_non_ind]) random_draw_in_rti = self.module.rng.random_sample(size=len(rt_current_non_ind)) selected_for_rti = rt_current_non_ind[pred > random_draw_in_rti] + # Update to say they have been involved in a rti df.loc[selected_for_rti, 'rt_road_traffic_inc'] = True # Set the date that people were injured to now diff --git a/src/tlo/simulation.py b/src/tlo/simulation.py index 42a2a288d3..a8ecf14cc6 100644 --- a/src/tlo/simulation.py +++ b/src/tlo/simulation.py @@ -281,7 +281,7 @@ def make_initial_population(self, *, n: int) -> None: data=f"{module.name}.initialise_population() {time.time() - start1} s", ) - self.event_chains = pd.DataFrame(columns= list(self.population.props.columns)+['person_ID'] + ['event'] + ['event_date'] + ['when']) + self.event_chains = pd.DataFrame(columns= list(self.population.props.columns)+['person_ID'] + ['event'] + ['event_date'] + ['when'] + ['appt_footprint']) end = time.time() logger.info(key="info", data=f"make_initial_population() {end - start} s") @@ -303,8 +303,8 @@ def initialise(self, *, end_date: Date, generate_event_chains) -> None: # Eventually this can be made an option self.generate_event_chains_overwrite_epi = True # For now keep these fixed, eventually they will be input from user - self.generate_event_chains_modules_of_interest = [self.modules['Tb'], self.modules['Hiv'], self.modules['CardioMetabolicDisorders']] - self.generate_event_chains_ignore_events = ['AgeUpdateEvent','HealthSystemScheduler', 'DirectBirth'] #['TbActiveCasePollGenerateData','HivPollingEventForDataGeneration','SimplifiedBirthsPoll', 'AgeUpdateEvent', 'HealthSystemScheduler'] + self.generate_event_chains_modules_of_interest = [self.modules['RTI']] + self.generate_event_chains_ignore_events = ['AgeUpdateEvent','HealthSystemScheduler', 'SimplifiedBirthsPoll','DirectBirth'] #['TbActiveCasePollGenerateData','HivPollingEventForDataGeneration','SimplifiedBirthsPoll', 'AgeUpdateEvent', 'HealthSystemScheduler'] else: # If not using to print chains, cannot ignore epi self.generate_event_chains_overwrite_epi = False diff --git a/tests/test_data_generation.py b/tests/test_data_generation.py index 8dd92513f9..af3c4f0ae9 100644 --- a/tests/test_data_generation.py +++ b/tests/test_data_generation.py @@ -25,15 +25,16 @@ depression, tb, contraception, -# simplified_births, + simplified_births, + rti, symptommanager, ) from tlo.methods.hsi_generic_first_appts import HSI_GenericEmergencyFirstAppt # create simulation parameters start_date = Date(2010, 1, 1) -end_date = Date(2014, 1, 1) -popsize = 100 +end_date = Date(2012, 1, 1) +popsize = 200 @pytest.mark.slow def test_data_harvesting(seed): @@ -41,7 +42,7 @@ def test_data_harvesting(seed): This test runs a simulation to print all individual events of specific individuals """ - module_of_interest = 'Hiv' + module_of_interest = 'RTI' # create sim object sim = create_basic_sim(popsize, seed) @@ -55,29 +56,31 @@ def test_data_harvesting(seed): # run simulation sim.simulate(end_date=end_date, generate_event_chains = True) - + exit(-1) def create_basic_sim(population_size, seed): # create the basic outline of an rti simulation object sim = Simulation(start_date=start_date, seed=seed) resourcefilepath = Path(os.path.dirname(__file__)) / '../resources' sim.register(demography.Demography(resourcefilepath=resourcefilepath), - contraception.Contraception(resourcefilepath=resourcefilepath), + # contraception.Contraception(resourcefilepath=resourcefilepath), enhanced_lifestyle.Lifestyle(resourcefilepath=resourcefilepath), healthburden.HealthBurden(resourcefilepath=resourcefilepath), symptommanager.SymptomManager(resourcefilepath=resourcefilepath), healthsystem.HealthSystem(resourcefilepath=resourcefilepath, service_availability=['*']), + rti.RTI(resourcefilepath=resourcefilepath), healthseekingbehaviour.HealthSeekingBehaviour(resourcefilepath=resourcefilepath), - epi.Epi(resourcefilepath=resourcefilepath), - hiv.Hiv(resourcefilepath=resourcefilepath), - tb.Tb(resourcefilepath=resourcefilepath), + simplified_births.SimplifiedBirths(resourcefilepath=resourcefilepath), + # epi.Epi(resourcefilepath=resourcefilepath), + # hiv.Hiv(resourcefilepath=resourcefilepath), + # tb.Tb(resourcefilepath=resourcefilepath), cardio_metabolic_disorders.CardioMetabolicDisorders(resourcefilepath=resourcefilepath), depression.Depression(resourcefilepath=resourcefilepath), - newborn_outcomes.NewbornOutcomes(resourcefilepath=resourcefilepath), - pregnancy_supervisor.PregnancySupervisor(resourcefilepath=resourcefilepath), - care_of_women_during_pregnancy.CareOfWomenDuringPregnancy(resourcefilepath=resourcefilepath), - labour.Labour(resourcefilepath=resourcefilepath), - postnatal_supervisor.PostnatalSupervisor(resourcefilepath=resourcefilepath), + # newborn_outcomes.NewbornOutcomes(resourcefilepath=resourcefilepath), + # pregnancy_supervisor.PregnancySupervisor(resourcefilepath=resourcefilepath), + # care_of_women_during_pregnancy.CareOfWomenDuringPregnancy(resourcefilepath=resourcefilepath), + # labour.Labour(resourcefilepath=resourcefilepath), + #postnatal_supervisor.PostnatalSupervisor(resourcefilepath=resourcefilepath), ) sim.make_initial_population(n=population_size) From c7bd9d058cea79fad0f8471830766f5c335a7df1 Mon Sep 17 00:00:00 2001 From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com> Date: Fri, 11 Oct 2024 16:57:21 +0100 Subject: [PATCH 10/54] Only store change in individual properties, not entire property row. Log changes to logger. --- src/tlo/events.py | 204 ++++++++++++++++++++++++++-------- src/tlo/methods/hsi_event.py | 134 ++++++++++++++++------ src/tlo/simulation.py | 2 +- tests/test_data_generation.py | 22 ++-- 4 files changed, 268 insertions(+), 94 deletions(-) diff --git a/src/tlo/events.py b/src/tlo/events.py index 76e1b9a117..436a01a97c 100644 --- a/src/tlo/events.py +++ b/src/tlo/events.py @@ -4,13 +4,20 @@ from enum import Enum from typing import TYPE_CHECKING -from tlo import DateOffset +from tlo import DateOffset, logging if TYPE_CHECKING: from tlo import Simulation import pandas as pd +logger = logging.getLogger(__name__) +logger.setLevel(logging.INFO) + +logger_summary = logging.getLogger(f"{__name__}.summary") +logger_summary.setLevel(logging.INFO) + +debug_chains = True class Priority(Enum): """Enumeration for the Priority, which is used in sorting the events in the simulation queue.""" @@ -62,66 +69,167 @@ def apply(self, target): :param target: the target of the event """ raise NotImplementedError - - def run(self): - """Make the event happen.""" + def compare_population_dataframe(self,df_before, df_after): + """ This function compares the population dataframe before/after a population-wide event has occurred. + It allows us to identify the individuals for which this event led to a significant (i.e. property) change, and to store the properties which have changed as a result of it. """ + + # Create a mask of where values are different + diff_mask = (df_before != df_after) & ~(df_before.isna() & df_after.isna()) + + # Create an empty list to store changes for each of the individuals + chain_links = {} + + # Loop through each row of the mask + for idx, row in diff_mask.iterrows(): + changed_cols = row.index[row].tolist() + + if changed_cols: # Proceed only if there are changes in the row + + # Create a dictionary for this person + # First add event info + link_info = { + #'person_ID': idx, + 'event': str(self), + 'event_date': self.sim.date, + } + + # Store the new values from df_after for the changed columns + for col in changed_cols: + link_info[col] = df_after.at[idx, col] + + + # Append the event and changes to the individual key + chain_links = {idx : link_info} + + return chain_links + + def store_chains_to_do_before_event(self) -> tuple[bool, pd.Series, pd.DataFrame]: + """ This function checks whether this event should be logged as part of the event chains, and if so stored required information before the event has occurred. """ + + # Initialise these variables print_chains = False df_before = [] + row_before = pd.Series() - if self.sim.generate_event_chains: - # Only print event if it belongs to modules of interest and if it is not in the list of events to ignore - #if (self.module in self.sim.generate_event_chains_modules_of_interest) and not - #if not set(self.sim.generate_event_chains_ignore_events).intersection(str(self)): - #if (self.module in self.sim.generate_event_chains_modules_of_interest) and all(sub not in str(self) for sub in self.sim.generate_event_chains_ignore_events): - if all(sub not in str(self) for sub in self.sim.generate_event_chains_ignore_events): - - print_chains = True - if self.target != self.sim.population: - row = self.sim.population.props.iloc[[self.target]] + # Only print event if it belongs to modules of interest and if it is not in the list of events to ignore + #if (self.module in self.sim.generate_event_chains_modules_of_interest) and .. + if all(sub not in str(self) for sub in self.sim.generate_event_chains_ignore_events): + + # Will eventually use this once I can actually GET THE NAME OF THE SELF + #if not set(self.sim.generate_event_chains_ignore_events).intersection(str(self)): + + print_chains = True + + # Target is single individual + if self.target != self.sim.population: + # Save row for comparison after event has occurred + row_before = self.sim.population.props.loc[abs(self.target)].copy().fillna(-99999) + + if debug_chains: + # Print entire row + row = self.sim.population.props.loc[[abs(self.target)]] row['person_ID'] = self.target row['event'] = str(self) row['event_date'] = self.sim.date row['when'] = 'Before' - row['appt_footprint'] = 'N/A' self.sim.event_chains = pd.concat([self.sim.event_chains, row], ignore_index=True) - else: - df_before = self.sim.population.props.copy() - - self.apply(self.target) - self.post_apply_hook() + else: + # This will be a population-wide event. In order to find individuals for which this led to + # a meaningful change, make a copy of the pop dataframe before the event has occurred. + df_before = self.sim.population.props.copy() + + return print_chains, row_before, df_before + + def store_chains_to_do_after_event(self, print_chains, row_before, df_before) -> dict: + """ If print_chains=True, this function logs the event and identifies and logs the any property changes that have occured to one or multiple individuals as a result of the event taking place. """ + + chain_links = {} + if print_chains: + + # Target is single individual if self.target != self.sim.population: - row = self.sim.population.props.iloc[[self.target]] - row['person_ID'] = self.target - row['event'] = str(self) - row['event_date'] = self.sim.date - row['when'] = 'After' - row['appt_footprint'] = 'N/A' - self.sim.event_chains = pd.concat([self.sim.event_chains, row], ignore_index=True) + row_after = self.sim.population.props.loc[abs(self.target)].fillna(-99999) + + # Create and store event for this individual + link_info = { + #'person_ID' : self.target, + 'event' : str(self), + 'event_date' : self.sim.date, + } + # Store property changes as a result of the event for this individual + for key in row_before.index: + if row_before[key] != row_after[key]: # Note: used fillna previously + link_info[key] = row_after[key] + + chain_links = {self.target : link_info} + + if debug_chains: + # Print entire row + row = self.sim.population.props.loc[[abs(self.target)]] # Use abs to avoid potentil issue with direct births + row['person_ID'] = self.target + row['event'] = str(self) + row['event_date'] = self.sim.date + row['when'] = 'After' + self.sim.event_chains = pd.concat([self.sim.event_chains, row], ignore_index=True) + else: - df_after = self.sim.population.props.copy() - change = df_before.compare(df_after) - if ~change.empty: - indices = change.index - new_rows_before = df_before.loc[indices] - new_rows_before['person_ID'] = new_rows_before.index - new_rows_before['event'] = self - new_rows_before['event_date'] = self.sim.date - new_rows_before['when'] = 'Before' - new_rows_before['appt_footprint'] = 'N/A' - - new_rows_after = df_after.loc[indices] - new_rows_after['person_ID'] = new_rows_after.index - new_rows_after['event'] = self - new_rows_after['event_date'] = self.sim.date - new_rows_after['when'] = 'After' - new_rows_after['appt_footprint'] = 'N/A' - - - self.sim.event_chains = pd.concat([self.sim.event_chains,new_rows_before], ignore_index=True) - self.sim.event_chains = pd.concat([self.sim.event_chains,new_rows_after], ignore_index=True) + # Target is entire population. Identify individuals for which properties have changed + # and store their changes. + + # Population frame after event + df_after = self.sim.population.props + + # Create and store the event and dictionary of changes for affected individuals + chain_links = self.compare_population_dataframe(df_before, df_after) + + if debug_chains: + # Or print entire rows + change = df_before.compare(df_after) + if not change.empty: + indices = change.index + new_rows_before = df_before.loc[indices] + new_rows_before['person_ID'] = new_rows_before.index + new_rows_before['event'] = self + new_rows_before['event_date'] = self.sim.date + new_rows_before['when'] = 'Before' + + new_rows_after = df_after.loc[indices] + new_rows_after['person_ID'] = new_rows_after.index + new_rows_after['event'] = self + new_rows_after['event_date'] = self.sim.date + new_rows_after['when'] = 'After' + + self.sim.event_chains = pd.concat([self.sim.event_chains,new_rows_before], ignore_index=True) + self.sim.event_chains = pd.concat([self.sim.event_chains,new_rows_after], ignore_index=True) + + return chain_links + + def run(self): + """Make the event happen.""" + + # Collect relevant information before event takes place + if self.sim.generate_event_chains: + print_chains, row_before, df_before = self.store_chains_to_do_before_event() + + self.apply(self.target) + self.post_apply_hook() + + # Collect event info + meaningful property changes of individuals. Combined, these will constitute a 'link' + # in the individual's event chain. + if self.sim.generate_event_chains: + chain_links = self.store_chains_to_do_after_event(print_chains, row_before, df_before) + + # Log chain_links here + if len(chain_links)>0: + logger.info(key='event_chains', + data= chain_links, + description='Links forming chains of events for simulated individuals') + + #print("Chain events ", chain_links) + class RegularEvent(Event): diff --git a/src/tlo/methods/hsi_event.py b/src/tlo/methods/hsi_event.py index f8e8738543..1c727f014b 100644 --- a/src/tlo/methods/hsi_event.py +++ b/src/tlo/methods/hsi_event.py @@ -16,12 +16,19 @@ from tlo import Module, Simulation from tlo.methods.healthsystem import HealthSystem +# Pointing to the logger in events +logger_chains = logging.getLogger("tlo.methods.event") +logger_chains.setLevel(logging.INFO) + logger = logging.getLogger(__name__) logger.setLevel(logging.INFO) logger_summary = logging.getLogger(f"{__name__}.summary") logger_summary.setLevel(logging.INFO) +debug_chains = True + + # Declare the level which will be used to represent the merging of levels '1b' and '2' LABEL_FOR_MERGED_FACILITY_LEVELS_1B_AND_2 = "2" @@ -187,54 +194,113 @@ def _run_after_hsi_event(self) -> None: item_codes=self._EQUIPMENT, facility_id=self.facility_info.id ) - - def run(self, squeeze_factor): - """Make the event happen.""" + + def store_chains_to_do_before_event(self) -> tuple[bool, pd.Series]: + """ This function checks whether this event should be logged as part of the event chains, and if so stored required information before the event has occurred. """ + # Initialise these variables print_chains = False - df_before = [] - - if self.sim.generate_event_chains: - # Only print event if it belongs to modules of interest and if it is not in the list of events to ignore - #if (self.module in self.sim.generate_event_chains_modules_of_interest) and not - #if not set(self.sim.generate_event_chains_ignore_events).intersection(str(self)): -# if (self.module in self.sim.generate_event_chains_modules_of_interest) and all(sub not in str(self) for sub in self.sim.generate_event_chains_ignore_events): - if all(sub not in str(self) for sub in self.sim.generate_event_chains_ignore_events): - - print_chains = True - if self.target != self.sim.population: - row = self.sim.population.props.iloc[[self.target]] - row['person_ID'] = self.target - row['event'] = str(self) - row['event_date'] = self.sim.date - row['when'] = 'Before' - row['appt_footprint'] = str(self.EXPECTED_APPT_FOOTPRINT) - self.sim.event_chains = pd.concat([self.sim.event_chains, row], ignore_index=True) - else: - df_before = self.sim.population.props.copy() - - updated_appt_footprint = self.apply(self.target, squeeze_factor) - self.post_apply_hook() - self._run_after_hsi_event() + row_before = pd.Series() - footprint = self.EXPECTED_APPT_FOOTPRINT - if updated_appt_footprint is not None: - footprint = updated_appt_footprint + # Only print event if it belongs to modules of interest and if it is not in the list of events to ignore + # if (self.module in self.sim.generate_event_chains_modules_of_interest) and + if all(sub not in str(self) for sub in self.sim.generate_event_chains_ignore_events): - if print_chains: + # Will eventually use this once I can actually GET THE NAME OF THE SELF + # if not set(self.sim.generate_event_chains_ignore_events).intersection(str(self)): + if self.target != self.sim.population: - row = self.sim.population.props.iloc[[self.target]] + + # In the case of HSI events, only individual events should exist and therefore be logged + print_chains = True + + # Save row for comparison after event has occurred + row_before = self.sim.population.props.loc[abs(self.target)].copy().fillna(-99999) + + row = self.sim.population.props.loc[[abs(self.target)]] row['person_ID'] = self.target row['event'] = str(self) row['event_date'] = self.sim.date - row['when'] = 'After' - row['appt_footprint'] = str(footprint) + row['when'] = 'Before' + row['appt_footprint'] = str(self.EXPECTED_APPT_FOOTPRINT) + row['level'] = self.facility_info.level self.sim.event_chains = pd.concat([self.sim.event_chains, row], ignore_index=True) + else: + # Many of our HealthSystem implementations rely on the assumption that print("Error, I shouldn't be here") exit(-1) + + return print_chains, row_before + + def store_chains_to_do_after_event(self, print_chains, row_before, footprint) -> dict: + """ If print_chains=True, this function logs the event and identifies and logs the any property changes that have occured to one or multiple individuals as a result of the event taking place. """ + if print_chains: + # For HSI event, this will only ever occur for individual events + + row_after = self.sim.population.props.loc[abs(self.target)].fillna(-99999) + + # Create and store dictionary of changes. Note that person_ID, event, event_date, appt_foot, and level + # will be stored regardless of whether individual experienced property changes. + + # Add event details + link_info = { + 'event' : str(self), + 'event_date' : self.sim.date, + 'appt_footprint' : str(footprint), + 'level' : self.facility_info.level, + } + + # Add changes to properties + for key in row_before.index: + if row_before[key] != row_after[key]: # Note: used fillna previously + link_info[key] = row_after[key] + + chain_links = {self.target : link_info} + + # Print entire row + row = self.sim.population.props.loc[[abs(self.target)]] + row['person_ID'] = self.target + row['event'] = str(self) + row['event_date'] = self.sim.date + row['when'] = 'After' + row['appt_footprint'] = footprint + row['level'] = self.facility_info.level + self.sim.event_chains = pd.concat([self.sim.event_chains, row], ignore_index=True) + + return chain_links + + + def run(self, squeeze_factor): + """Make the event happen.""" + + + if self.sim.generate_event_chains: + print_chains, row_before = self.store_chains_to_do_before_event() + + footprint = self.EXPECTED_APPT_FOOTPRINT + updated_appt_footprint = self.apply(self.target, squeeze_factor) + self.post_apply_hook() + self._run_after_hsi_event() + + + if self.sim.generate_event_chains: + + # If the footprint has been updated when the event ran, change it here + if updated_appt_footprint is not None: + footprint = updated_appt_footprint + + chain_links = self.store_chains_to_do_after_event(print_chains, row_before, str(footprint)) + + if len(chain_links)>0: + logger_chains.info(key='event_chains', + data = chain_links, + description='Links forming chains of events for simulated individuals') + #print(chain_links) + return updated_appt_footprint + def get_consumables( self, diff --git a/src/tlo/simulation.py b/src/tlo/simulation.py index a8ecf14cc6..20b3a4898f 100644 --- a/src/tlo/simulation.py +++ b/src/tlo/simulation.py @@ -281,7 +281,7 @@ def make_initial_population(self, *, n: int) -> None: data=f"{module.name}.initialise_population() {time.time() - start1} s", ) - self.event_chains = pd.DataFrame(columns= list(self.population.props.columns)+['person_ID'] + ['event'] + ['event_date'] + ['when'] + ['appt_footprint']) + self.event_chains = pd.DataFrame(columns= list(self.population.props.columns)+['person_ID'] + ['event'] + ['event_date'] + ['when'] + ['appt_footprint'] + ['level']) end = time.time() logger.info(key="info", data=f"make_initial_population() {end - start} s") diff --git a/tests/test_data_generation.py b/tests/test_data_generation.py index af3c4f0ae9..39f2b022aa 100644 --- a/tests/test_data_generation.py +++ b/tests/test_data_generation.py @@ -33,7 +33,7 @@ # create simulation parameters start_date = Date(2010, 1, 1) -end_date = Date(2012, 1, 1) +end_date = Date(2011, 1, 1) popsize = 200 @pytest.mark.slow @@ -63,24 +63,24 @@ def create_basic_sim(population_size, seed): sim = Simulation(start_date=start_date, seed=seed) resourcefilepath = Path(os.path.dirname(__file__)) / '../resources' sim.register(demography.Demography(resourcefilepath=resourcefilepath), - # contraception.Contraception(resourcefilepath=resourcefilepath), + contraception.Contraception(resourcefilepath=resourcefilepath), enhanced_lifestyle.Lifestyle(resourcefilepath=resourcefilepath), healthburden.HealthBurden(resourcefilepath=resourcefilepath), symptommanager.SymptomManager(resourcefilepath=resourcefilepath), healthsystem.HealthSystem(resourcefilepath=resourcefilepath, service_availability=['*']), rti.RTI(resourcefilepath=resourcefilepath), healthseekingbehaviour.HealthSeekingBehaviour(resourcefilepath=resourcefilepath), - simplified_births.SimplifiedBirths(resourcefilepath=resourcefilepath), - # epi.Epi(resourcefilepath=resourcefilepath), - # hiv.Hiv(resourcefilepath=resourcefilepath), - # tb.Tb(resourcefilepath=resourcefilepath), + # simplified_births.SimplifiedBirths(resourcefilepath=resourcefilepath), + epi.Epi(resourcefilepath=resourcefilepath), + hiv.Hiv(resourcefilepath=resourcefilepath), + tb.Tb(resourcefilepath=resourcefilepath), cardio_metabolic_disorders.CardioMetabolicDisorders(resourcefilepath=resourcefilepath), depression.Depression(resourcefilepath=resourcefilepath), - # newborn_outcomes.NewbornOutcomes(resourcefilepath=resourcefilepath), - # pregnancy_supervisor.PregnancySupervisor(resourcefilepath=resourcefilepath), - # care_of_women_during_pregnancy.CareOfWomenDuringPregnancy(resourcefilepath=resourcefilepath), - # labour.Labour(resourcefilepath=resourcefilepath), - #postnatal_supervisor.PostnatalSupervisor(resourcefilepath=resourcefilepath), + newborn_outcomes.NewbornOutcomes(resourcefilepath=resourcefilepath), + pregnancy_supervisor.PregnancySupervisor(resourcefilepath=resourcefilepath), + care_of_women_during_pregnancy.CareOfWomenDuringPregnancy(resourcefilepath=resourcefilepath), + labour.Labour(resourcefilepath=resourcefilepath), + postnatal_supervisor.PostnatalSupervisor(resourcefilepath=resourcefilepath), ) sim.make_initial_population(n=population_size) From 769aaeca44aaedc324bd3da2f5f338bb47e02106 Mon Sep 17 00:00:00 2001 From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com> Date: Fri, 11 Oct 2024 17:03:22 +0100 Subject: [PATCH 11/54] Style fixes --- src/tlo/methods/tb.py | 2 +- src/tlo/simulation.py | 4 ++-- tests/test_data_generation.py | 5 ----- 3 files changed, 3 insertions(+), 8 deletions(-) diff --git a/src/tlo/methods/tb.py b/src/tlo/methods/tb.py index 4c170944d2..9dc05ff301 100644 --- a/src/tlo/methods/tb.py +++ b/src/tlo/methods/tb.py @@ -1393,7 +1393,7 @@ def apply(self, population): & (df.tb_inf != "active") ].index - n_susceptible = len(susc_idx) + len(susc_idx) middle_index = len(susc_idx) // 2 diff --git a/src/tlo/simulation.py b/src/tlo/simulation.py index 20b3a4898f..75dfa76429 100644 --- a/src/tlo/simulation.py +++ b/src/tlo/simulation.py @@ -8,7 +8,7 @@ import time from collections import OrderedDict from pathlib import Path -from typing import Dict, Optional, Union +from typing import Optional from typing import TYPE_CHECKING, Optional import pandas as pd @@ -374,7 +374,7 @@ def run_simulation_to(self, *, to_date: Date) -> None: :param to_date: Date to simulate up to but not including - must be before or equal to simulation end date specified in call to :py:meth:`initialise`. """ - f = open('output.txt', mode='a') + open('output.txt', mode='a') if not self._initialised: msg = "Simulation must be initialised before calling run_simulation_to" diff --git a/tests/test_data_generation.py b/tests/test_data_generation.py index 39f2b022aa..c94618a77d 100644 --- a/tests/test_data_generation.py +++ b/tests/test_data_generation.py @@ -1,7 +1,6 @@ import os from pathlib import Path -import pandas as pd import pytest from tlo import Date, Simulation @@ -11,7 +10,6 @@ depression, enhanced_lifestyle, epi, - epilepsy, healthburden, healthseekingbehaviour, healthsystem, @@ -20,16 +18,13 @@ labour, newborn_outcomes, postnatal_supervisor, - pregnancy_helper_functions, pregnancy_supervisor, depression, tb, contraception, - simplified_births, rti, symptommanager, ) -from tlo.methods.hsi_generic_first_appts import HSI_GenericEmergencyFirstAppt # create simulation parameters start_date = Date(2010, 1, 1) From 757cee36b0ae611f1f7ae31d25799fc0d6e7daa1 Mon Sep 17 00:00:00 2001 From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com> Date: Sun, 13 Oct 2024 11:15:17 +0100 Subject: [PATCH 12/54] Include printing of individual properties at the beginning and at birth, label what is only used for ddebugging and will be later removed --- src/tlo/events.py | 5 +++-- src/tlo/methods/hsi_event.py | 7 ++++--- src/tlo/methods/rti.py | 2 +- src/tlo/simulation.py | 28 ++++++++++++++++++++++++++++ tests/test_data_generation.py | 5 ++--- 5 files changed, 38 insertions(+), 9 deletions(-) diff --git a/src/tlo/events.py b/src/tlo/events.py index 436a01a97c..03bf7c72fa 100644 --- a/src/tlo/events.py +++ b/src/tlo/events.py @@ -98,7 +98,6 @@ def compare_population_dataframe(self,df_before, df_after): for col in changed_cols: link_info[col] = df_after.at[idx, col] - # Append the event and changes to the individual key chain_links = {idx : link_info} @@ -127,7 +126,7 @@ def store_chains_to_do_before_event(self) -> tuple[bool, pd.Series, pd.DataFrame row_before = self.sim.population.props.loc[abs(self.target)].copy().fillna(-99999) if debug_chains: - # Print entire row + # TO BE REMOVED This is currently just used for debugging. Will be removed from final version of PR. row = self.sim.population.props.loc[[abs(self.target)]] row['person_ID'] = self.target row['event'] = str(self) @@ -166,6 +165,7 @@ def store_chains_to_do_after_event(self, print_chains, row_before, df_before) -> chain_links = {self.target : link_info} + # TO BE REMOVED This is currently just used for debugging. Will be removed from final version of PR. if debug_chains: # Print entire row row = self.sim.population.props.loc[[abs(self.target)]] # Use abs to avoid potentil issue with direct births @@ -185,6 +185,7 @@ def store_chains_to_do_after_event(self, print_chains, row_before, df_before) -> # Create and store the event and dictionary of changes for affected individuals chain_links = self.compare_population_dataframe(df_before, df_after) + # TO BE REMOVED This is currently just used for debugging. Will be removed from final version of PR. if debug_chains: # Or print entire rows change = df_before.compare(df_after) diff --git a/src/tlo/methods/hsi_event.py b/src/tlo/methods/hsi_event.py index 1c727f014b..0c3bc16072 100644 --- a/src/tlo/methods/hsi_event.py +++ b/src/tlo/methods/hsi_event.py @@ -217,6 +217,7 @@ def store_chains_to_do_before_event(self) -> tuple[bool, pd.Series]: # Save row for comparison after event has occurred row_before = self.sim.population.props.loc[abs(self.target)].copy().fillna(-99999) + # TO BE REMOVED This is currently just used for debugging. Will be removed from final version of PR. row = self.sim.population.props.loc[[abs(self.target)]] row['person_ID'] = self.target row['event'] = str(self) @@ -228,8 +229,8 @@ def store_chains_to_do_before_event(self) -> tuple[bool, pd.Series]: else: # Many of our HealthSystem implementations rely on the assumption that - print("Error, I shouldn't be here") - exit(-1) + raise RuntimeError("Cannot have population-wide HSI events") + return print_chains, row_before @@ -258,7 +259,7 @@ def store_chains_to_do_after_event(self, print_chains, row_before, footprint) -> chain_links = {self.target : link_info} - # Print entire row + # TO BE REMOVED This is currently just used for debugging. Will be removed from final version of PR. row = self.sim.population.props.loc[[abs(self.target)]] row['person_ID'] = self.target row['event'] = str(self) diff --git a/src/tlo/methods/rti.py b/src/tlo/methods/rti.py index 1c12e7162b..3642365976 100644 --- a/src/tlo/methods/rti.py +++ b/src/tlo/methods/rti.py @@ -2865,7 +2865,7 @@ def apply(self, population): Predictor('li_ex_alc').when(True, self.rr_injrti_excessalcohol) ) if self.sim.generate_event_chains is True and self.sim.generate_event_chains_overwrite_epi is True: - pred = 1 + pred = 1.0 else: pred = eq.predict(df.loc[rt_current_non_ind]) random_draw_in_rti = self.module.rng.random_sample(size=len(rt_current_non_ind)) diff --git a/src/tlo/simulation.py b/src/tlo/simulation.py index 75dfa76429..582fb4ba1c 100644 --- a/src/tlo/simulation.py +++ b/src/tlo/simulation.py @@ -37,6 +37,9 @@ logger = logging.getLogger(__name__) logger.setLevel(logging.INFO) +logger_chains = logging.getLogger("tlo.methods.event") +logger_chains.setLevel(logging.INFO) + class SimulationPreviouslyInitialisedError(Exception): """Exception raised when trying to initialise an already initialised simulation.""" @@ -111,6 +114,8 @@ def __init__( self.end_date = None self.output_file = None self.population: Optional[Population] = None + + # TO BE REMOVED This is currently just used for debugging. Will be removed from final version of PR. self.event_chains: Optinoal[Population] = None self.show_progress_bar = show_progress_bar @@ -281,7 +286,16 @@ def make_initial_population(self, *, n: int) -> None: data=f"{module.name}.initialise_population() {time.time() - start1} s", ) + # TO BE REMOVED This is currently just used for debugging. Will be removed from final version of PR. self.event_chains = pd.DataFrame(columns= list(self.population.props.columns)+['person_ID'] + ['event'] + ['event_date'] + ['when'] + ['appt_footprint'] + ['level']) + + # When logging events for each individual to reconstruct chains, only the changes in individual properties will be logged. + # At the start of the simulation + when a new individual is born, we therefore want to store all of their properties at the start. + if self.generate_event_chains: + pop_dict = self.population.props.to_dict(orient='index') + logger_chains.info(key='event_chains', + data = pop_dict, + description='Links forming chains of events for simulated individuals') end = time.time() logger.info(key="info", data=f"make_initial_population() {end - start} s") @@ -392,6 +406,8 @@ def run_simulation_to(self, *, to_date: Date) -> None: self._update_progress_bar(progress_bar, date) self.fire_single_event(event, date) self.date = to_date + + # TO BE REMOVED: this is currently only used for debugging, will be removed from final PR. self.event_chains.to_csv('output.csv', index=False) if self.show_progress_bar: @@ -449,13 +465,25 @@ def do_birth(self, mother_id: int) -> int: child_id = self.population.do_birth() for module in self.modules.values(): module.on_birth(mother_id, child_id) + if self.generate_event_chains: + # When individual is born, store their initial properties to provide a starting point to the chain of property + # changes that this individual will undergo as a result of events taking place. + prop_dict = self.population.props.loc[child_id].to_dict() + + child_dict = {child_id : prop_dict} + logger_chains.info(key='event_chains', + data = child_dict, + description='Links forming chains of events for simulated individuals') + + # TO BE REMOVED This is currently just used for debugging. Will be removed from final version of PR. row = self.population.props.iloc[[child_id]] row['person_ID'] = child_id row['event'] = 'Birth' row['event_date'] = self.date row['when'] = 'After' self.event_chains = pd.concat([self.event_chains, row], ignore_index=True) + return child_id def find_events_for_person(self, person_id: int) -> list[tuple[Date, Event]]: diff --git a/tests/test_data_generation.py b/tests/test_data_generation.py index c94618a77d..d9885c1fab 100644 --- a/tests/test_data_generation.py +++ b/tests/test_data_generation.py @@ -28,8 +28,8 @@ # create simulation parameters start_date = Date(2010, 1, 1) -end_date = Date(2011, 1, 1) -popsize = 200 +end_date = Date(2012, 1, 1) +popsize = 100 @pytest.mark.slow def test_data_harvesting(seed): @@ -51,7 +51,6 @@ def test_data_harvesting(seed): # run simulation sim.simulate(end_date=end_date, generate_event_chains = True) - exit(-1) def create_basic_sim(population_size, seed): # create the basic outline of an rti simulation object From 22a5e44312ad4d2f1d955b70399ae9569efb13c0 Mon Sep 17 00:00:00 2001 From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com> Date: Wed, 16 Oct 2024 14:00:22 +0100 Subject: [PATCH 13/54] Log everything to simulation, as events logger doesn't seem to be visible to all modules. For now add person_ID to the dict of info printed as the outer dictionary key logging seems to have a problem. --- src/tlo/events.py | 13 +++++++++---- src/tlo/methods/hsi_event.py | 3 ++- src/tlo/simulation.py | 25 +++++++++++++++++-------- 3 files changed, 28 insertions(+), 13 deletions(-) diff --git a/src/tlo/events.py b/src/tlo/events.py index 03bf7c72fa..98832faecb 100644 --- a/src/tlo/events.py +++ b/src/tlo/events.py @@ -11,9 +11,13 @@ import pandas as pd + logger = logging.getLogger(__name__) logger.setLevel(logging.INFO) +logger_chain = logging.getLogger('tlo.simulation') +logger_chain.setLevel(logging.INFO) + logger_summary = logging.getLogger(f"{__name__}.summary") logger_summary.setLevel(logging.INFO) @@ -89,7 +93,7 @@ def compare_population_dataframe(self,df_before, df_after): # Create a dictionary for this person # First add event info link_info = { - #'person_ID': idx, + 'person_ID': idx, 'event': str(self), 'event_date': self.sim.date, } @@ -152,13 +156,14 @@ def store_chains_to_do_after_event(self, print_chains, row_before, df_before) -> if self.target != self.sim.population: row_after = self.sim.population.props.loc[abs(self.target)].fillna(-99999) - # Create and store event for this individual + # Create and store event for this individual, regardless of whether any property change occurred link_info = { #'person_ID' : self.target, + 'person_ID' : self.target, 'event' : str(self), 'event_date' : self.sim.date, } - # Store property changes as a result of the event for this individual + # Store (if any) property changes as a result of the event for this individual for key in row_before.index: if row_before[key] != row_after[key]: # Note: used fillna previously link_info[key] = row_after[key] @@ -225,7 +230,7 @@ def run(self): # Log chain_links here if len(chain_links)>0: - logger.info(key='event_chains', + logger_chain.info(key='event_chains', data= chain_links, description='Links forming chains of events for simulated individuals') diff --git a/src/tlo/methods/hsi_event.py b/src/tlo/methods/hsi_event.py index 0c3bc16072..6651a8704a 100644 --- a/src/tlo/methods/hsi_event.py +++ b/src/tlo/methods/hsi_event.py @@ -17,7 +17,7 @@ from tlo.methods.healthsystem import HealthSystem # Pointing to the logger in events -logger_chains = logging.getLogger("tlo.methods.event") +logger_chains = logging.getLogger("tlo.simulation") logger_chains.setLevel(logging.INFO) logger = logging.getLogger(__name__) @@ -246,6 +246,7 @@ def store_chains_to_do_after_event(self, print_chains, row_before, footprint) -> # Add event details link_info = { + 'person_ID': self.target, 'event' : str(self), 'event_date' : self.sim.date, 'appt_footprint' : str(footprint), diff --git a/src/tlo/simulation.py b/src/tlo/simulation.py index 582fb4ba1c..fd9fade215 100644 --- a/src/tlo/simulation.py +++ b/src/tlo/simulation.py @@ -107,7 +107,7 @@ def __init__( self.date = self.start_date = start_date self.modules = OrderedDict() self.event_queue = EventQueue() - self.generate_event_chains = None + self.generate_event_chains = True self.generate_event_chains_overwrite_epi = None self.generate_event_chains_modules_of_interest = [] self.generate_event_chains_ignore_events = [] @@ -292,15 +292,23 @@ def make_initial_population(self, *, n: int) -> None: # When logging events for each individual to reconstruct chains, only the changes in individual properties will be logged. # At the start of the simulation + when a new individual is born, we therefore want to store all of their properties at the start. if self.generate_event_chains: + pop_dict = self.population.props.to_dict(orient='index') - logger_chains.info(key='event_chains', + + print(pop_dict) + print(pop_dict.keys()) + for key in pop_dict.keys(): + pop_dict[key]['person_ID'] = key + print("Length of properties", len(pop_dict[0].keys())) + #exit(-1) + logger.info(key='event_chains', data = pop_dict, description='Links forming chains of events for simulated individuals') end = time.time() logger.info(key="info", data=f"make_initial_population() {end - start} s") - def initialise(self, *, end_date: Date, generate_event_chains) -> None: + def initialise(self, *, end_date: Date) -> None: """Initialise all modules in simulation. :param end_date: Date to end simulation on - accessible to modules to allow initialising data structures which may depend (in size for example) on the @@ -312,7 +320,7 @@ def initialise(self, *, end_date: Date, generate_event_chains) -> None: self.date = self.start_date self.end_date = end_date # store the end_date so that others can reference it - self.generate_event_chains = generate_event_chains + #self.generate_event_chains = generate_event_chains if self.generate_event_chains: # Eventually this can be made an option self.generate_event_chains_overwrite_epi = True @@ -413,7 +421,7 @@ def run_simulation_to(self, *, to_date: Date) -> None: if self.show_progress_bar: progress_bar.stop() - def simulate(self, *, end_date: Date, generate_event_chains=False) -> None: + def simulate(self, *, end_date: Date) -> None: """Simulate until the given end date :param end_date: When to stop simulating. Only events strictly before this @@ -421,7 +429,7 @@ def simulate(self, *, end_date: Date, generate_event_chains=False) -> None: clarity. """ start = time.time() - self.initialise(end_date=end_date, generate_event_chains=generate_event_chains) + self.initialise(end_date=end_date) self.run_simulation_to(to_date=end_date) self.finalise(time.time() - start) @@ -470,9 +478,10 @@ def do_birth(self, mother_id: int) -> int: # When individual is born, store their initial properties to provide a starting point to the chain of property # changes that this individual will undergo as a result of events taking place. prop_dict = self.population.props.loc[child_id].to_dict() - + prop_dict['event'] = 'Birth' + prop_dict['event_date'] = self.date child_dict = {child_id : prop_dict} - logger_chains.info(key='event_chains', + logger.info(key='event_chains', data = child_dict, description='Links forming chains of events for simulated individuals') From 7faa81783dc43e434e26ef8c95717480cebd3816 Mon Sep 17 00:00:00 2001 From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com> Date: Fri, 18 Oct 2024 10:07:46 +0200 Subject: [PATCH 14/54] Consider all modules included as of interest --- src/tlo/simulation.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/tlo/simulation.py b/src/tlo/simulation.py index fd9fade215..15be1622e8 100644 --- a/src/tlo/simulation.py +++ b/src/tlo/simulation.py @@ -325,7 +325,7 @@ def initialise(self, *, end_date: Date) -> None: # Eventually this can be made an option self.generate_event_chains_overwrite_epi = True # For now keep these fixed, eventually they will be input from user - self.generate_event_chains_modules_of_interest = [self.modules['RTI']] + self.generate_event_chains_modules_of_interest = [self.modules] self.generate_event_chains_ignore_events = ['AgeUpdateEvent','HealthSystemScheduler', 'SimplifiedBirthsPoll','DirectBirth'] #['TbActiveCasePollGenerateData','HivPollingEventForDataGeneration','SimplifiedBirthsPoll', 'AgeUpdateEvent', 'HealthSystemScheduler'] else: # If not using to print chains, cannot ignore epi From 7232f976831054ed541d59d8da20c91289fa79e6 Mon Sep 17 00:00:00 2001 From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com> Date: Fri, 18 Oct 2024 11:29:43 +0200 Subject: [PATCH 15/54] Remove pop-wide HSI warning and make epi default even when printing chains --- src/tlo/methods/hsi_event.py | 38 ++++++++++++++++++++++++++---------- src/tlo/simulation.py | 2 +- 2 files changed, 29 insertions(+), 11 deletions(-) diff --git a/src/tlo/methods/hsi_event.py b/src/tlo/methods/hsi_event.py index 6651a8704a..d0cdb5bbdd 100644 --- a/src/tlo/methods/hsi_event.py +++ b/src/tlo/methods/hsi_event.py @@ -223,13 +223,23 @@ def store_chains_to_do_before_event(self) -> tuple[bool, pd.Series]: row['event'] = str(self) row['event_date'] = self.sim.date row['when'] = 'Before' - row['appt_footprint'] = str(self.EXPECTED_APPT_FOOTPRINT) - row['level'] = self.facility_info.level + try: + row['appt_footprint'] = str(self.EXPECTED_APPT_FOOTPRINT) + row['level'] = self.facility_info.level + except: + row['appt_footprint'] = 'N/A' + row['level'] = 'N/A' self.sim.event_chains = pd.concat([self.sim.event_chains, row], ignore_index=True) else: - # Many of our HealthSystem implementations rely on the assumption that - raise RuntimeError("Cannot have population-wide HSI events") + # Once this has been removed from Chronic Syndrome mock module, make this a Runtime Error + # raise RuntimeError("Cannot have population-wide HSI events") + logger.debug( + key="message", + data=( + f"Cannot have population-wide HSI events" + ), + ) return print_chains, row_before @@ -245,12 +255,20 @@ def store_chains_to_do_after_event(self, print_chains, row_before, footprint) -> # will be stored regardless of whether individual experienced property changes. # Add event details + + try: + record_footprint = str(footprint) + record_level = self.facility_info.level + except: + record_footprint = 'N/A' + record_level = 'N/A' + link_info = { 'person_ID': self.target, 'event' : str(self), 'event_date' : self.sim.date, - 'appt_footprint' : str(footprint), - 'level' : self.facility_info.level, + 'appt_footprint' : record_footprint, + 'level' : record_level, } # Add changes to properties @@ -266,8 +284,8 @@ def store_chains_to_do_after_event(self, print_chains, row_before, footprint) -> row['event'] = str(self) row['event_date'] = self.sim.date row['when'] = 'After' - row['appt_footprint'] = footprint - row['level'] = self.facility_info.level + row['appt_footprint'] = record_footprint + row['level'] = record_level self.sim.event_chains = pd.concat([self.sim.event_chains, row], ignore_index=True) return chain_links @@ -277,7 +295,7 @@ def run(self, squeeze_factor): """Make the event happen.""" - if self.sim.generate_event_chains: + if self.sim.generate_event_chains and self.target != self.sim.population: print_chains, row_before = self.store_chains_to_do_before_event() footprint = self.EXPECTED_APPT_FOOTPRINT @@ -287,7 +305,7 @@ def run(self, squeeze_factor): self._run_after_hsi_event() - if self.sim.generate_event_chains: + if self.sim.generate_event_chains and self.target != self.sim.population: # If the footprint has been updated when the event ran, change it here if updated_appt_footprint is not None: diff --git a/src/tlo/simulation.py b/src/tlo/simulation.py index 15be1622e8..0c70b164d9 100644 --- a/src/tlo/simulation.py +++ b/src/tlo/simulation.py @@ -323,7 +323,7 @@ def initialise(self, *, end_date: Date) -> None: #self.generate_event_chains = generate_event_chains if self.generate_event_chains: # Eventually this can be made an option - self.generate_event_chains_overwrite_epi = True + self.generate_event_chains_overwrite_epi = False # For now keep these fixed, eventually they will be input from user self.generate_event_chains_modules_of_interest = [self.modules] self.generate_event_chains_ignore_events = ['AgeUpdateEvent','HealthSystemScheduler', 'SimplifiedBirthsPoll','DirectBirth'] #['TbActiveCasePollGenerateData','HivPollingEventForDataGeneration','SimplifiedBirthsPoll', 'AgeUpdateEvent', 'HealthSystemScheduler'] From a6def2d22c0d291ce775afef561b580847ad36cf Mon Sep 17 00:00:00 2001 From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com> Date: Fri, 18 Oct 2024 11:39:24 +0200 Subject: [PATCH 16/54] Style fix --- src/tlo/methods/hsi_event.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/tlo/methods/hsi_event.py b/src/tlo/methods/hsi_event.py index d0cdb5bbdd..041ab9cf08 100644 --- a/src/tlo/methods/hsi_event.py +++ b/src/tlo/methods/hsi_event.py @@ -237,7 +237,7 @@ def store_chains_to_do_before_event(self) -> tuple[bool, pd.Series]: logger.debug( key="message", data=( - f"Cannot have population-wide HSI events" + "Cannot have population-wide HSI events" ), ) From ecea532a2843d312580accf97383cd62c457fd04 Mon Sep 17 00:00:00 2001 From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com> Date: Fri, 18 Oct 2024 11:51:39 +0200 Subject: [PATCH 17/54] Remove data generation test, which wasn't really a test --- tests/test_data_generation.py | 82 ----------------------------------- 1 file changed, 82 deletions(-) delete mode 100644 tests/test_data_generation.py diff --git a/tests/test_data_generation.py b/tests/test_data_generation.py deleted file mode 100644 index d9885c1fab..0000000000 --- a/tests/test_data_generation.py +++ /dev/null @@ -1,82 +0,0 @@ -import os -from pathlib import Path - -import pytest - -from tlo import Date, Simulation -from tlo.methods import ( - care_of_women_during_pregnancy, - demography, - depression, - enhanced_lifestyle, - epi, - healthburden, - healthseekingbehaviour, - healthsystem, - hiv, - cardio_metabolic_disorders, - labour, - newborn_outcomes, - postnatal_supervisor, - pregnancy_supervisor, - depression, - tb, - contraception, - rti, - symptommanager, -) - -# create simulation parameters -start_date = Date(2010, 1, 1) -end_date = Date(2012, 1, 1) -popsize = 100 - -@pytest.mark.slow -def test_data_harvesting(seed): - """ - This test runs a simulation to print all individual events of specific individuals - """ - - module_of_interest = 'RTI' - # create sim object - sim = create_basic_sim(popsize, seed) - - dependencies_list = sim.modules[module_of_interest].ADDITIONAL_DEPENDENCIES.union(sim.modules[module_of_interest].INIT_DEPENDENCIES) - - # Check that all dependencies are included - for dep in dependencies_list: - if dep not in sim.modules: - print("WARNING: dependency ", dep, "not included") - exit(-1) - - # run simulation - sim.simulate(end_date=end_date, generate_event_chains = True) - -def create_basic_sim(population_size, seed): - # create the basic outline of an rti simulation object - sim = Simulation(start_date=start_date, seed=seed) - resourcefilepath = Path(os.path.dirname(__file__)) / '../resources' - sim.register(demography.Demography(resourcefilepath=resourcefilepath), - contraception.Contraception(resourcefilepath=resourcefilepath), - enhanced_lifestyle.Lifestyle(resourcefilepath=resourcefilepath), - healthburden.HealthBurden(resourcefilepath=resourcefilepath), - symptommanager.SymptomManager(resourcefilepath=resourcefilepath), - healthsystem.HealthSystem(resourcefilepath=resourcefilepath, service_availability=['*']), - rti.RTI(resourcefilepath=resourcefilepath), - healthseekingbehaviour.HealthSeekingBehaviour(resourcefilepath=resourcefilepath), - # simplified_births.SimplifiedBirths(resourcefilepath=resourcefilepath), - epi.Epi(resourcefilepath=resourcefilepath), - hiv.Hiv(resourcefilepath=resourcefilepath), - tb.Tb(resourcefilepath=resourcefilepath), - cardio_metabolic_disorders.CardioMetabolicDisorders(resourcefilepath=resourcefilepath), - depression.Depression(resourcefilepath=resourcefilepath), - newborn_outcomes.NewbornOutcomes(resourcefilepath=resourcefilepath), - pregnancy_supervisor.PregnancySupervisor(resourcefilepath=resourcefilepath), - care_of_women_during_pregnancy.CareOfWomenDuringPregnancy(resourcefilepath=resourcefilepath), - labour.Labour(resourcefilepath=resourcefilepath), - postnatal_supervisor.PostnatalSupervisor(resourcefilepath=resourcefilepath), - ) - - sim.make_initial_population(n=population_size) - return sim - From ae7a44cb5f72063c48555e3b21d5d6dd4400ee97 Mon Sep 17 00:00:00 2001 From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com> Date: Wed, 23 Oct 2024 15:29:03 +0200 Subject: [PATCH 18/54] Change dict of properties to string in logging, and add analysis files --- .../analysis_extract_data.py | 370 ++++++++++++++++++ .../postprocess_events_chain.py | 156 ++++++++ .../scenario_generate_chains.py | 115 ++++++ src/tlo/events.py | 23 +- src/tlo/methods/hsi_event.py | 13 +- src/tlo/simulation.py | 29 +- 6 files changed, 684 insertions(+), 22 deletions(-) create mode 100644 src/scripts/analysis_data_generation/analysis_extract_data.py create mode 100644 src/scripts/analysis_data_generation/postprocess_events_chain.py create mode 100644 src/scripts/analysis_data_generation/scenario_generate_chains.py diff --git a/src/scripts/analysis_data_generation/analysis_extract_data.py b/src/scripts/analysis_data_generation/analysis_extract_data.py new file mode 100644 index 0000000000..2cfba5315b --- /dev/null +++ b/src/scripts/analysis_data_generation/analysis_extract_data.py @@ -0,0 +1,370 @@ +"""Produce plots to show the health impact (deaths, dalys) each the healthcare system (overall health impact) when +running under different MODES and POLICIES (scenario_impact_of_actual_vs_funded.py)""" + +# short tclose -> ideal case +# long tclose -> status quo +import argparse +from pathlib import Path +from typing import Tuple + +import pandas as pd + +from tlo import Date +from tlo.analysis.utils import extract_results +from datetime import datetime + +# Range of years considered +min_year = 2010 +max_year = 2040 + + +def all_columns(_df): + return pd.Series(_df.all()) + +def apply(results_folder: Path, output_folder: Path, resourcefilepath: Path = None, ): + """Produce standard set of plots describing the effect of each TREATMENT_ID. + - We estimate the epidemiological impact as the EXTRA deaths that would occur if that treatment did not occur. + - We estimate the draw on healthcare system resources as the FEWER appointments when that treatment does not occur. + """ + pd.set_option('display.max_rows', None) + pd.set_option('display.max_colwidth', None) + event_chains = extract_results( + results_folder, + module='tlo.simulation', + key='event_chains', + column='0', + #column = str(i), + #custom_generate_series=get_num_dalys_by_year, + do_scaling=False + ) + # print(event_chains.loc[0,(0, 0)]) + + eval_env = { + 'datetime': datetime, # Add the datetime class to the eval environment + 'pd': pd, # Add pandas to handle Timestamp + 'Timestamp': pd.Timestamp, # Specifically add Timestamp for eval + 'NaT': pd.NaT, + 'nan': float('nan'), # Include NaN for eval (can also use pd.NA if preferred) + } + + for item,row in event_chains.iterrows(): + value = event_chains.loc[item,(0, 0)] + if value !='': + print('') + print(value) + exit(-1) + #dict = {} + #for i in [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]: + # dict[i] = [] + + #for i in [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]: + # event_chains = extract_results( + # results_folder, + # module='tlo.simulation'#, + # key='event_chains', + # column = str(i), + # #custom_generate_series=get_num_dalys_by_year, + # do_scaling=False + # ) + # print(event_chains) + # print(event_chains.index) + # print(event_chains.columns.levels) + + # for index, row in event_chains.iterrows(): + # if event_chains.iloc[index,0] is not None: + # if(event_chains.iloc[index,0]['person_ID']==i): #and 'event' in event_chains.iloc[index,0].keys()): + # dict[i].append(event_chains.iloc[index,0]) + #elif (event_chains.iloc[index,0]['person_ID']==i and 'event' not in event_chains.iloc[index,0].keys()): + #print(event_chains.iloc[index,0]['de_depr']) + # exit(-1) + #for item in dict[0]: + # print(item) + + #exit(-1) + + TARGET_PERIOD = (Date(min_year, 1, 1), Date(max_year, 1, 1)) + + # Definitions of general helper functions + lambda stub: output_folder / f"{stub.replace('*', '_star_')}.png" # noqa: E731 + + def target_period() -> str: + """Returns the target period as a string of the form YYYY-YYYY""" + return "-".join(str(t.year) for t in TARGET_PERIOD) + + def get_parameter_names_from_scenario_file() -> Tuple[str]: + """Get the tuple of names of the scenarios from `Scenario` class used to create the results.""" + from scripts.healthsystem.impact_of_actual_vs_funded.scenario_impact_of_actual_vs_funded import ( + ImpactOfHealthSystemMode, + ) + e = ImpactOfHealthSystemMode() + return tuple(e._scenarios.keys()) + + def get_num_deaths(_df): + """Return total number of Deaths (total within the TARGET_PERIOD) + """ + return pd.Series(data=len(_df.loc[pd.to_datetime(_df.date).between(*TARGET_PERIOD)])) + + def get_num_dalys(_df): + """Return total number of DALYs (Stacked) by label (total within the TARGET_PERIOD)""" + return pd.Series( + data=_df + .loc[_df.year.between(*[i.year for i in TARGET_PERIOD])] + .drop(columns=['date', 'sex', 'age_range', 'year']) + .sum().sum() + ) + + def get_num_dalys_by_cause(_df): + """Return number of DALYs by cause by label (total within the TARGET_PERIOD)""" + return pd.Series( + data=_df + .loc[_df.year.between(*[i.year for i in TARGET_PERIOD])] + .drop(columns=['date', 'sex', 'age_range', 'year']) + .sum() + ) + + def set_param_names_as_column_index_level_0(_df): + """Set the columns index (level 0) as the param_names.""" + ordered_param_names_no_prefix = {i: x for i, x in enumerate(param_names)} + names_of_cols_level0 = [ordered_param_names_no_prefix.get(col) for col in _df.columns.levels[0]] + assert len(names_of_cols_level0) == len(_df.columns.levels[0]) + _df.columns = _df.columns.set_levels(names_of_cols_level0, level=0) + return _df + + def find_difference_relative_to_comparison(_ser: pd.Series, + comparison: str, + scaled: bool = False, + drop_comparison: bool = True, + ): + """Find the difference in the values in a pd.Series with a multi-index, between the draws (level 0) + within the runs (level 1), relative to where draw = `comparison`. + The comparison is `X - COMPARISON`.""" + return _ser \ + .unstack(level=0) \ + .apply(lambda x: (x - x[comparison]) / (x[comparison] if scaled else 1.0), axis=1) \ + .drop(columns=([comparison] if drop_comparison else [])) \ + .stack() + + + def get_counts_of_hsi_by_treatment_id(_df): + """Get the counts of the short TREATMENT_IDs occurring""" + _counts_by_treatment_id = _df \ + .loc[pd.to_datetime(_df['date']).between(*TARGET_PERIOD), 'TREATMENT_ID'] \ + .apply(pd.Series) \ + .sum() \ + .astype(int) + return _counts_by_treatment_id.groupby(level=0).sum() + + year_target = 2023 + def get_counts_of_hsi_by_treatment_id_by_year(_df): + """Get the counts of the short TREATMENT_IDs occurring""" + _counts_by_treatment_id = _df \ + .loc[pd.to_datetime(_df['date']).dt.year ==year_target, 'TREATMENT_ID'] \ + .apply(pd.Series) \ + .sum() \ + .astype(int) + return _counts_by_treatment_id.groupby(level=0).sum() + + def get_counts_of_hsi_by_short_treatment_id(_df): + """Get the counts of the short TREATMENT_IDs occurring (shortened, up to first underscore)""" + _counts_by_treatment_id = get_counts_of_hsi_by_treatment_id(_df) + _short_treatment_id = _counts_by_treatment_id.index.map(lambda x: x.split('_')[0] + "*") + return _counts_by_treatment_id.groupby(by=_short_treatment_id).sum() + + def get_counts_of_hsi_by_short_treatment_id_by_year(_df): + """Get the counts of the short TREATMENT_IDs occurring (shortened, up to first underscore)""" + _counts_by_treatment_id = get_counts_of_hsi_by_treatment_id_by_year(_df) + _short_treatment_id = _counts_by_treatment_id.index.map(lambda x: x.split('_')[0] + "*") + return _counts_by_treatment_id.groupby(by=_short_treatment_id).sum() + + + # Obtain parameter names for this scenario file + param_names = get_parameter_names_from_scenario_file() + print(param_names) + + # ================================================================================================ + # TIME EVOLUTION OF TOTAL DALYs + # Plot DALYs averted compared to the ``No Policy'' policy + + year_target = 2023 # This global variable will be passed to custom function + def get_num_dalys_by_year(_df): + """Return total number of DALYs (Stacked) by label (total within the TARGET_PERIOD)""" + return pd.Series( + data=_df + .loc[_df.year == year_target] + .drop(columns=['date', 'sex', 'age_range', 'year']) + .sum().sum() + ) + + ALL = {} + # Plot time trend show year prior transition as well to emphasise that until that point DALYs incurred + # are consistent across different policies + this_min_year = 2010 + for year in range(this_min_year, max_year+1): + year_target = year + num_dalys_by_year = extract_results( + results_folder, + module='tlo.methods.healthburden', + key='dalys_stacked', + custom_generate_series=get_num_dalys_by_year, + do_scaling=True + ).pipe(set_param_names_as_column_index_level_0) + ALL[year_target] = num_dalys_by_year + # Concatenate the DataFrames into a single DataFrame + concatenated_df = pd.concat(ALL.values(), keys=ALL.keys()) + concatenated_df.index = concatenated_df.index.set_names(['date', 'index_original']) + concatenated_df = concatenated_df.reset_index(level='index_original',drop=True) + dalys_by_year = concatenated_df + print(dalys_by_year) + dalys_by_year.to_csv('ConvertedOutputs/Total_DALYs_with_time.csv', index=True) + + # ================================================================================================ + # Print population under each scenario + pop_model = extract_results(results_folder, + module="tlo.methods.demography", + key="population", + column="total", + index="date", + do_scaling=True + ).pipe(set_param_names_as_column_index_level_0) + + pop_model.index = pop_model.index.year + pop_model = pop_model[(pop_model.index >= this_min_year) & (pop_model.index <= max_year)] + print(pop_model) + assert dalys_by_year.index.equals(pop_model.index) + assert all(dalys_by_year.columns == pop_model.columns) + pop_model.to_csv('ConvertedOutputs/Population_with_time.csv', index=True) + + # ================================================================================================ + # DALYs BROKEN DOWN BY CAUSES AND YEAR + # DALYs by cause per year + # %% Quantify the health losses associated with all interventions combined. + + year_target = 2023 # This global variable will be passed to custom function + def get_num_dalys_by_year_and_cause(_df): + """Return total number of DALYs (Stacked) by label (total within the TARGET_PERIOD)""" + return pd.Series( + data=_df + .loc[_df.year == year_target] + .drop(columns=['date', 'sex', 'age_range', 'year']) + .sum() + ) + + ALL = {} + # Plot time trend show year prior transition as well to emphasise that until that point DALYs incurred + # are consistent across different policies + this_min_year = 2010 + for year in range(this_min_year, max_year+1): + year_target = year + num_dalys_by_year = extract_results( + results_folder, + module='tlo.methods.healthburden', + key='dalys_stacked', + custom_generate_series=get_num_dalys_by_year_and_cause, + do_scaling=True + ).pipe(set_param_names_as_column_index_level_0) + ALL[year_target] = num_dalys_by_year #summarize(num_dalys_by_year) + + # Concatenate the DataFrames into a single DataFrame + concatenated_df = pd.concat(ALL.values(), keys=ALL.keys()) + + concatenated_df.index = concatenated_df.index.set_names(['date', 'cause']) + + df_total = concatenated_df + df_total.to_csv('ConvertedOutputs/DALYS_by_cause_with_time.csv', index=True) + + ALL = {} + # Plot time trend show year prior transition as well to emphasise that until that point DALYs incurred + # are consistent across different policies + for year in range(min_year, max_year+1): + year_target = year + + hsi_delivered_by_year = extract_results( + results_folder, + module='tlo.methods.healthsystem.summary', + key='HSI_Event', + custom_generate_series=get_counts_of_hsi_by_short_treatment_id_by_year, + do_scaling=True + ).pipe(set_param_names_as_column_index_level_0) + ALL[year_target] = hsi_delivered_by_year + + # Concatenate the DataFrames into a single DataFrame + concatenated_df = pd.concat(ALL.values(), keys=ALL.keys()) + concatenated_df.index = concatenated_df.index.set_names(['date', 'cause']) + HSI_ran_by_year = concatenated_df + + del ALL + + ALL = {} + # Plot time trend show year prior transition as well to emphasise that until that point DALYs incurred + # are consistent across different policies + for year in range(min_year, max_year+1): + year_target = year + + hsi_not_delivered_by_year = extract_results( + results_folder, + module='tlo.methods.healthsystem.summary', + key='Never_ran_HSI_Event', + custom_generate_series=get_counts_of_hsi_by_short_treatment_id_by_year, + do_scaling=True + ).pipe(set_param_names_as_column_index_level_0) + ALL[year_target] = hsi_not_delivered_by_year + + # Concatenate the DataFrames into a single DataFrame + concatenated_df = pd.concat(ALL.values(), keys=ALL.keys()) + concatenated_df.index = concatenated_df.index.set_names(['date', 'cause']) + HSI_never_ran_by_year = concatenated_df + + HSI_never_ran_by_year = HSI_never_ran_by_year.fillna(0) #clean_df( + HSI_ran_by_year = HSI_ran_by_year.fillna(0) + HSI_total_by_year = HSI_ran_by_year.add(HSI_never_ran_by_year, fill_value=0) + HSI_ran_by_year.to_csv('ConvertedOutputs/HSIs_ran_by_area_with_time.csv', index=True) + HSI_never_ran_by_year.to_csv('ConvertedOutputs/HSIs_never_ran_by_area_with_time.csv', index=True) + print(HSI_ran_by_year) + print(HSI_never_ran_by_year) + print(HSI_total_by_year) + +if __name__ == "__main__": + rfp = Path('resources') + + parser = argparse.ArgumentParser( + description="Produce plots to show the impact each set of treatments", + formatter_class=argparse.ArgumentDefaultsHelpFormatter, + ) + parser.add_argument( + "--output-path", + help=( + "Directory to write outputs to. If not specified (set to None) outputs " + "will be written to value of --results-path argument." + ), + type=Path, + default=None, + required=False, + ) + parser.add_argument( + "--resources-path", + help="Directory containing resource files", + type=Path, + default=Path('resources'), + required=False, + ) + parser.add_argument( + "--results-path", + type=Path, + help=( + "Directory containing results from running " + "src/scripts/analysis_data_generation/scenario_generate_chains.py " + ), + default=None, + required=False + ) + args = parser.parse_args() + assert args.results_path is not None + results_path = args.results_path + + output_path = results_path if args.output_path is None else args.output_path + + apply( + results_folder=results_path, + output_folder=output_path, + resourcefilepath=args.resources_path + ) diff --git a/src/scripts/analysis_data_generation/postprocess_events_chain.py b/src/scripts/analysis_data_generation/postprocess_events_chain.py new file mode 100644 index 0000000000..96c27a04b1 --- /dev/null +++ b/src/scripts/analysis_data_generation/postprocess_events_chain.py @@ -0,0 +1,156 @@ +import pandas as pd +from dateutil.relativedelta import relativedelta + +# Remove from every individual's event chain all events that were fired after death +def cut_off_events_after_death(df): + + events_chain = df.groupby('person_ID') + + filtered_data = pd.DataFrame() + + for name, group in events_chain: + + # Find the first non-NaN 'date_of_death' and its index + first_non_nan_index = group['date_of_death'].first_valid_index() + + if first_non_nan_index is not None: + # Filter out all rows after the first non-NaN index + filtered_group = group.loc[:first_non_nan_index] # Keep rows up to and including the first valid index + filtered_data = pd.concat([filtered_data, filtered_group]) + else: + # If there are no non-NaN values, keep the original group + filtered_data = pd.concat([filtered_data, group]) + + return filtered_data + +# Load into DataFrame +def load_csv_to_dataframe(file_path): + try: + # Load raw chains into df + df = pd.read_csv(file_path) + print("Raw event chains loaded successfully!") + return df + except FileNotFoundError: + print(f"Error: The file '{file_path}' was not found.") + except Exception as e: + print(f"An error occurred: {e}") + +file_path = 'output.csv' # Replace with the path to your CSV file + +output = load_csv_to_dataframe(file_path) + +# Some of the dates appeared not to be in datetime format. Correct here. +output['date_of_death'] = pd.to_datetime(output['date_of_death'], errors='coerce') +output['date_of_birth'] = pd.to_datetime(output['date_of_birth'], errors='coerce') +if 'hv_date_inf' in output.columns: + output['hv_date_inf'] = pd.to_datetime(output['hv_date_inf'], errors='coerce') + + +date_start = pd.to_datetime('2010-01-01') +if 'Other' in output['cause_of_death'].values: + print("ERROR: 'Other' was included in sim as possible cause of death") + exit(-1) + +# Choose which columns in individual properties to visualise +columns_to_print =['event','is_alive','hv_inf', 'hv_art','tb_inf', 'tb_date_active', 'event_date', 'when'] +#columns_to_print =['person_ID', 'date_of_birth', 'date_of_death', 'cause_of_death','hv_date_inf', 'hv_art','tb_inf', 'tb_date_active', 'event date', 'event'] + +# When checking which individuals led to *any* changes in individual properties, exclude these columns from comparison +columns_to_exclude_in_comparison = ['when', 'event', 'event_date', 'age_exact_years', 'age_years', 'age_days', 'age_range', 'level', 'appt_footprint'] + +# If considering epidemiology consistent with sim, add check here. +check_ages_of_those_HIV_inf = False +if check_ages_of_those_HIV_inf: + for index, row in output.iterrows(): + if pd.isna(row['hv_date_inf']): + continue # Skip this iteration + diff = relativedelta(output.loc[index, 'hv_date_inf'],output.loc[index, 'date_of_birth']) + if diff.years > 1 and diff.years<15: + print("Person contracted HIV infection at age younger than 15", diff) + +# Remove events after death +filtered_data = cut_off_events_after_death(output) + +print_raw_events = True # Print raw chain of events for each individual +print_selected_changes = False +print_all_changes = True +person_ID_of_interest = 494 + +pd.set_option('display.max_rows', None) + +for name, group in filtered_data.groupby('person_ID'): + list_of_dob = group['date_of_birth'] + + # Select individuals based on when they were born + if list_of_dob.iloc[0].year<2010: + + # Check that immutable properties are fixed for this individual, i.e. that events were collated properly: + all_identical_dob = group['date_of_birth'].nunique() == 1 + all_identical_sex = group['sex'].nunique() == 1 + if all_identical_dob is False or all_identical_sex is False: + print("Immutable properties are changing! This is not chain for single individual") + print(group) + exit(-1) + + print("----------------------------------------------------------------------") + print("person_ID ", group['person_ID'].iloc[0], "d.o.b ", group['date_of_birth'].iloc[0]) + print("Number of events for this individual ", group['person_ID'].iloc[0], "is :", len(group)/2) # Divide by 2 before printing Before/After for each event + number_of_events =len(group)/2 + number_of_changes=0 + if print_raw_events: + print(group) + + if print_all_changes: + # Check each row + comparison = group.drop(columns=columns_to_exclude_in_comparison).fillna(-99999).ne(group.drop(columns=columns_to_exclude_in_comparison).shift().fillna(-99999)) + + # Iterate over rows where any column has changed + for idx, row_changed in comparison.iloc[1:].iterrows(): + if row_changed.any(): # Check if any column changed in this row + number_of_changes+=1 + changed_columns = row_changed[row_changed].index.tolist() # Get the columns where changes occurred + print(f"Row {idx} - Changes detected in columns: {changed_columns}") + columns_output = ['event', 'event_date', 'appt_footprint', 'level'] + changed_columns + print(group.loc[idx, columns_output]) # Print only the changed columns + if group.loc[idx, 'when'] == 'Before': + print('-----> THIS CHANGE OCCURRED BEFORE EVENT!') + #print(group.loc[idx,columns_to_print]) + print() # For better readability + print("Number of changes is ", number_of_changes, "out of ", number_of_events, " events") + + if print_selected_changes: + tb_inf_condition = ( + ((group['tb_inf'].shift(1) == 'uninfected') & (group['tb_inf'] == 'active')) | + ((group['tb_inf'].shift(1) == 'latent') & (group['tb_inf'] == 'active')) | + ((group['tb_inf'].shift(1) == 'active') & (group['tb_inf'] == 'latent')) | + ((group['hv_inf'].shift(1) is False) & (group['hv_inf'] is True)) | + ((group['hv_art'].shift(1) == 'not') & (group['hv_art'] == 'on_not_VL_suppressed')) | + ((group['hv_art'].shift(1) == 'not') & (group['hv_art'] == 'on_VL_suppressed')) | + ((group['hv_art'].shift(1) == 'on_VL_suppressed') & (group['hv_art'] == 'on_not_VL_suppressed')) | + ((group['hv_art'].shift(1) == 'on_VL_suppressed') & (group['hv_art'] == 'not')) | + ((group['hv_art'].shift(1) == 'on_not_VL_suppressed') & (group['hv_art'] == 'on_VL_suppressed')) | + ((group['hv_art'].shift(1) == 'on_not_VL_suppressed') & (group['hv_art'] == 'not')) + ) + + alive_condition = ( + (group['is_alive'].shift(1) is True) & (group['is_alive'] is False) + ) + # Combine conditions for rows of interest + transition_condition = tb_inf_condition | alive_condition + + if list_of_dob.iloc[0].year >= 2010: + print("DETECTED OF INTEREST") + print(group[group['event'] == 'Birth'][columns_to_print]) + + # Filter the DataFrame based on the condition + filtered_transitions = group[transition_condition] + if not filtered_transitions.empty: + if list_of_dob.iloc[0].year < 2010: + print("DETECTED OF INTEREST") + print(filtered_transitions[columns_to_print]) + + +print("Number of individuals simulated ", filtered_data.groupby('person_ID').ngroups) + + + diff --git a/src/scripts/analysis_data_generation/scenario_generate_chains.py b/src/scripts/analysis_data_generation/scenario_generate_chains.py new file mode 100644 index 0000000000..6bdcd02d90 --- /dev/null +++ b/src/scripts/analysis_data_generation/scenario_generate_chains.py @@ -0,0 +1,115 @@ +"""This Scenario file run the model to generate event chans + +Run on the batch system using: +``` +tlo batch-submit + src/scripts/analysis_data_generation/scenario_generate_chains.py +``` + +or locally using: +``` + tlo scenario-run src/scripts/analysis_data_generation/scenario_generate_chains.py +``` + +""" +from pathlib import Path +from typing import Dict + +import pandas as pd + +from tlo import Date, logging +from tlo.analysis.utils import get_parameters_for_status_quo, mix_scenarios +from tlo.methods.fullmodel import fullmodel +from tlo.methods.scenario_switcher import ImprovedHealthSystemAndCareSeekingScenarioSwitcher +from tlo.scenario import BaseScenario + + +class GenerateDataChains(BaseScenario): + def __init__(self): + super().__init__() + self.seed = 0 + self.start_date = Date(2010, 1, 1) + self.end_date = self.start_date + pd.DateOffset(months=1) + self.pop_size = 120 + self._scenarios = self._get_scenarios() + self.number_of_draws = len(self._scenarios) + self.runs_per_draw = 1 + self.generate_event_chains = True + + def log_configuration(self): + return { + 'filename': 'generate_event_chains', + 'directory': Path('./outputs'), # <- (specified only for local running) + 'custom_levels': { + '*': logging.WARNING, + 'tlo.methods.demography': logging.INFO, + 'tlo.methods.events': logging.INFO, + 'tlo.methods.demography.detail': logging.WARNING, + 'tlo.methods.healthburden': logging.INFO, + 'tlo.methods.healthsystem.summary': logging.INFO, + } + } + + def modules(self): + return ( + fullmodel(resourcefilepath=self.resources) + + [ImprovedHealthSystemAndCareSeekingScenarioSwitcher(resourcefilepath=self.resources)] + ) + + def draw_parameters(self, draw_number, rng): + if draw_number < self.number_of_draws: + return list(self._scenarios.values())[draw_number] + else: + return + + # case 1: gfHE = -0.030, factor = 1.01074 + # case 2: gfHE = -0.020, factor = 1.02116 + # case 3: gfHE = -0.015, factor = 1.02637 + # case 4: gfHE = 0.015, factor = 1.05763 + # case 5: gfHE = 0.020, factor = 1.06284 + # case 6: gfHE = 0.030, factor = 1.07326 + + def _get_scenarios(self) -> Dict[str, Dict]: + """Return the Dict with values for the parameters that are changed, keyed by a name for the scenario. + """ + + self.YEAR_OF_CHANGE = 2019 + + return { + + # =========== STATUS QUO ============ + "Baseline": + mix_scenarios( + self._baseline(), + { + "HealthSystem": { + "yearly_HR_scaling_mode": "no_scaling", + }, + } + ), + + } + + def _baseline(self) -> Dict: + """Return the Dict with values for the parameter changes that define the baseline scenario. """ + return mix_scenarios( + get_parameters_for_status_quo(), + { + "HealthSystem": { + "mode_appt_constraints": 1, # <-- Mode 1 prior to change to preserve calibration + "mode_appt_constraints_postSwitch": 2, # <-- Mode 2 post-change to show effects of HRH + "year_mode_switch": self.YEAR_OF_CHANGE, + "scale_to_effective_capabilities": True, + "policy_name": "Naive", + "tclose_overwrite": 1, + "tclose_days_offset_overwrite": 7, + "use_funded_or_actual_staffing": "actual", + "cons_availability": "default", + } + }, + ) + +if __name__ == '__main__': + from tlo.cli import scenario_run + + scenario_run([__file__]) diff --git a/src/tlo/events.py b/src/tlo/events.py index 98832faecb..00a6fe4e7d 100644 --- a/src/tlo/events.py +++ b/src/tlo/events.py @@ -11,6 +11,8 @@ import pandas as pd +FACTOR_POP_DICT = 5000 + logger = logging.getLogger(__name__) logger.setLevel(logging.INFO) @@ -83,13 +85,14 @@ def compare_population_dataframe(self,df_before, df_after): # Create an empty list to store changes for each of the individuals chain_links = {} - + len_of_diff = len(diff_mask) + # Loop through each row of the mask + for idx, row in diff_mask.iterrows(): changed_cols = row.index[row].tolist() - + if changed_cols: # Proceed only if there are changes in the row - # Create a dictionary for this person # First add event info link_info = { @@ -103,7 +106,7 @@ def compare_population_dataframe(self,df_before, df_after): link_info[col] = df_after.at[idx, col] # Append the event and changes to the individual key - chain_links = {idx : link_info} + chain_links[idx] = str(link_info) return chain_links @@ -168,7 +171,7 @@ def store_chains_to_do_after_event(self, print_chains, row_before, df_before) -> if row_before[key] != row_after[key]: # Note: used fillna previously link_info[key] = row_after[key] - chain_links = {self.target : link_info} + chain_links[self.target] = str(link_info) # TO BE REMOVED This is currently just used for debugging. Will be removed from final version of PR. if debug_chains: @@ -228,14 +231,18 @@ def run(self): if self.sim.generate_event_chains: chain_links = self.store_chains_to_do_after_event(print_chains, row_before, df_before) + # Create empty logger for entire pop + pop_dict = {i: '' for i in range(FACTOR_POP_DICT)} # Always include all possible individuals + + pop_dict.update(chain_links) + # Log chain_links here if len(chain_links)>0: logger_chain.info(key='event_chains', - data= chain_links, - description='Links forming chains of events for simulated individuals') + data= pop_dict, + description='Links forming chains of events for simulated individuals') #print("Chain events ", chain_links) - class RegularEvent(Event): diff --git a/src/tlo/methods/hsi_event.py b/src/tlo/methods/hsi_event.py index 041ab9cf08..d657e9d3a0 100644 --- a/src/tlo/methods/hsi_event.py +++ b/src/tlo/methods/hsi_event.py @@ -11,6 +11,8 @@ import pandas as pd +FACTOR_POP_DICT = 5000 + if TYPE_CHECKING: from tlo import Module, Simulation @@ -276,7 +278,7 @@ def store_chains_to_do_after_event(self, print_chains, row_before, footprint) -> if row_before[key] != row_after[key]: # Note: used fillna previously link_info[key] = row_after[key] - chain_links = {self.target : link_info} + chain_links = {self.target : str(link_info)} # TO BE REMOVED This is currently just used for debugging. Will be removed from final version of PR. row = self.sim.population.props.loc[[abs(self.target)]] @@ -314,10 +316,15 @@ def run(self, squeeze_factor): chain_links = self.store_chains_to_do_after_event(print_chains, row_before, str(footprint)) if len(chain_links)>0: + + pop_dict = {i: '' for i in range(FACTOR_POP_DICT)} + # pop_dict = {i: '' for i in range(1000)} # Always include all possible individuals + + pop_dict.update(chain_links) + logger_chains.info(key='event_chains', - data = chain_links, + data = pop_dict, description='Links forming chains of events for simulated individuals') - #print(chain_links) return updated_appt_footprint diff --git a/src/tlo/simulation.py b/src/tlo/simulation.py index 0c70b164d9..d9ba62c43a 100644 --- a/src/tlo/simulation.py +++ b/src/tlo/simulation.py @@ -40,6 +40,8 @@ logger_chains = logging.getLogger("tlo.methods.event") logger_chains.setLevel(logging.INFO) +FACTOR_POP_DICT = 5000 + class SimulationPreviouslyInitialisedError(Exception): """Exception raised when trying to initialise an already initialised simulation.""" @@ -294,17 +296,18 @@ def make_initial_population(self, *, n: int) -> None: if self.generate_event_chains: pop_dict = self.population.props.to_dict(orient='index') - - print(pop_dict) - print(pop_dict.keys()) for key in pop_dict.keys(): pop_dict[key]['person_ID'] = key - print("Length of properties", len(pop_dict[0].keys())) - #exit(-1) + pop_dict[key] = str(pop_dict[key]) # Log as string to avoid issues around length of properties stored later + + pop_dict_full = {i: '' for i in range(FACTOR_POP_DICT)} + pop_dict_full.update(pop_dict) + + print("Size for full sim", len(pop_dict_full)) + logger.info(key='event_chains', - data = pop_dict, + data = pop_dict_full, description='Links forming chains of events for simulated individuals') - end = time.time() logger.info(key="info", data=f"make_initial_population() {end - start} s") @@ -323,7 +326,7 @@ def initialise(self, *, end_date: Date) -> None: #self.generate_event_chains = generate_event_chains if self.generate_event_chains: # Eventually this can be made an option - self.generate_event_chains_overwrite_epi = False + self.generate_event_chains_overwrite_epi = True # For now keep these fixed, eventually they will be input from user self.generate_event_chains_modules_of_interest = [self.modules] self.generate_event_chains_ignore_events = ['AgeUpdateEvent','HealthSystemScheduler', 'SimplifiedBirthsPoll','DirectBirth'] #['TbActiveCasePollGenerateData','HivPollingEventForDataGeneration','SimplifiedBirthsPoll', 'AgeUpdateEvent', 'HealthSystemScheduler'] @@ -480,9 +483,13 @@ def do_birth(self, mother_id: int) -> int: prop_dict = self.population.props.loc[child_id].to_dict() prop_dict['event'] = 'Birth' prop_dict['event_date'] = self.date - child_dict = {child_id : prop_dict} + + pop_dict = {i: '' for i in range(FACTOR_POP_DICT)} # Always include all possible individuals + pop_dict[child_id] = str(prop_dict) # Convert to string to avoid issue of length + + print("Length at birth", len(pop_dict)) logger.info(key='event_chains', - data = child_dict, + data = pop_dict, description='Links forming chains of events for simulated individuals') # TO BE REMOVED This is currently just used for debugging. Will be removed from final version of PR. @@ -492,7 +499,7 @@ def do_birth(self, mother_id: int) -> int: row['event_date'] = self.date row['when'] = 'After' self.event_chains = pd.concat([self.event_chains, row], ignore_index=True) - + return child_id def find_events_for_person(self, person_id: int) -> list[tuple[Date, Event]]: From 16299a21f43862a188f41ea6117b81c2c11d72ab Mon Sep 17 00:00:00 2001 From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com> Date: Mon, 25 Nov 2024 09:37:29 +0000 Subject: [PATCH 19/54] Include debugging option, final set-up of scenario to print data, analysis file now collects all relevant info and prints them --- .../analysis_extract_data.py | 157 ++++++++++++++++-- .../scenario_generate_chains.py | 53 +++++- src/tlo/events.py | 10 +- src/tlo/methods/hsi_event.py | 50 +++--- src/tlo/methods/rti.py | 17 +- src/tlo/simulation.py | 39 +++-- src/tlo/util.py | 1 + 7 files changed, 252 insertions(+), 75 deletions(-) diff --git a/src/scripts/analysis_data_generation/analysis_extract_data.py b/src/scripts/analysis_data_generation/analysis_extract_data.py index 2cfba5315b..6eb6408830 100644 --- a/src/scripts/analysis_data_generation/analysis_extract_data.py +++ b/src/scripts/analysis_data_generation/analysis_extract_data.py @@ -8,10 +8,14 @@ from typing import Tuple import pandas as pd +import matplotlib.pyplot as plt from tlo import Date from tlo.analysis.utils import extract_results from datetime import datetime +from collections import Counter +import ast + # Range of years considered min_year = 2010 @@ -28,17 +32,7 @@ def apply(results_folder: Path, output_folder: Path, resourcefilepath: Path = No """ pd.set_option('display.max_rows', None) pd.set_option('display.max_colwidth', None) - event_chains = extract_results( - results_folder, - module='tlo.simulation', - key='event_chains', - column='0', - #column = str(i), - #custom_generate_series=get_num_dalys_by_year, - do_scaling=False - ) - # print(event_chains.loc[0,(0, 0)]) - + eval_env = { 'datetime': datetime, # Add the datetime class to the eval environment 'pd': pd, # Add pandas to handle Timestamp @@ -46,13 +40,144 @@ def apply(results_folder: Path, output_folder: Path, resourcefilepath: Path = No 'NaT': pd.NaT, 'nan': float('nan'), # Include NaN for eval (can also use pd.NA if preferred) } + + initial_properties_of_interest = ['rt_inj_severity','rt_MAIS_military_score','rt_ISS_score','rt_disability','rt_polytrauma','rt_injury_1','rt_injury_2','rt_injury_3','rt_injury_4','rt_injury_5','rt_injury_6', 'rt_imm_death','sy_injury','sex','li_urban', 'li_wealth', 'li_ex_alc', 'li_exposed_to_campaign_alcohol_reduction', 'li_mar_stat', 'li_in_ed', 'li_ed_lev'] + + # Will be added through computation: age at time of RTI + + # Will be added through computation: total duration of event + + initial_rt_event_properties = set() + + num_individuals = 1000 + num_runs = 50 + record = [] + + + for i in range(0,num_individuals): - for item,row in event_chains.iterrows(): - value = event_chains.loc[item,(0, 0)] - if value !='': - print('') - print(value) + individual_event_chains = extract_results( + results_folder, + module='tlo.simulation', + key='event_chains', + column=str(i), + do_scaling=False + ) + + #print(individual_event_chains) + + + for r in range(0,num_runs): + + print("AT RUN = ", r) + + initial_properties = {} + progression_properties = {} + key_first_event = {} + key_last_event = {} + first_event = {} + last_event = {} + properties = {} + + + #ind_Counter = Counter() + ind_Counter = {'0': Counter(), '1a': Counter(), '1b' : Counter(), '2' : Counter()} + # Count total appts + + list_for_individual = [] + for item,row in individual_event_chains.iterrows(): + value = individual_event_chains.loc[item,(0, r)] + # print("The value is", value, "at run ", r) + if value !='' and isinstance(value, str): + evaluated = eval(value, eval_env) + list_for_individual.append(evaluated) + # elif not isinstance(value,str): + # print(value) + + initial_properties = list_for_individual[0] + print(initial_properties) + + # Initialise first event by gathering parameters of interest from initial_properties + first_event = {key: initial_properties[key] for key in initial_properties_of_interest if key in initial_properties} + + progression_properties = {} + for i in list_for_individual: + if 'event' in i: + print("") + print(i) + if 'RTIPolling' in i['event']: + #print("I'm in polling event") + #print(i) + + # Keep track of which properties are changed during polling events + for key,value in i.items(): + if 'rt_' in key: + initial_rt_event_properties.add(key) + + # Retain a copy of Polling event + polling_event = i.copy() + + # Update parameters of interest following RTI + key_first_event = {key: i[key] if key in i else value for key, value in first_event.items()} + + # Calculate age of individual at time of event + key_first_event['age_in_days_at_event'] = (i['rt_date_inj'] - initial_properties['date_of_birth']).days + + # Keep track of evolution in individual's properties + progression_properties = initial_properties.copy() + progression_properties.update(i) + + else: + # Progress properties of individual, even if this event is a death + progression_properties.update(i) + + #print(progression_properties) + # Update footprint + if 'appt_footprint' in i and i['appt_footprint'] != 'Counter()': + footprint = i['appt_footprint'] + if 'Counter' in footprint: + footprint = footprint[len("Counter("):-1] + apply = eval(footprint, eval_env) + ind_Counter[i['level']].update(Counter(apply)) + + if 'is_alive' in i and i['is_alive'] is False: + print("Death", i) + print("-------Total footprint", ind_Counter) + break + + + # Compute final properties of individual + key_last_event['is_alive_after_RTI'] = progression_properties['is_alive'] + key_last_event['duration_days'] = (progression_properties['event_date'] - polling_event['rt_date_inj']).days + key_last_event['rt_disability_final'] = progression_properties['rt_disability'] + key_last_event.update({'total_footprint': ind_Counter}) + + #print("-------Total footprint", ind_Counter) + #for key, value in key_first_event.items(): + # if 'rt_' in key or 'alive' in key: + # print(f"{key}: {value}") + #print(#) + #for key, value in key_last_event.items(): + #if 'rt_' in key or 'alive' in key or 'event_date' in key or 'footprint' in key: + # print(f"{key}: {value}") + + #print(key_first_event) + #print(key_last_event) + print(initial_rt_event_properties) + properties = key_first_event | key_last_event + record.append(properties) + for key, value in properties.items(): + #if 'rt_' in key or 'alive' in key or 'event_date' in key or 'footprint' in key: + print(f"{key}: {value}") + + df = pd.DataFrame(record) + df.to_csv("raw_data.csv", index=False) + + print(df) + print(initial_rt_event_properties) exit(-1) + #print(i) + #dict = {} #for i in [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]: # dict[i] = [] diff --git a/src/scripts/analysis_data_generation/scenario_generate_chains.py b/src/scripts/analysis_data_generation/scenario_generate_chains.py index 6bdcd02d90..79df3f55b6 100644 --- a/src/scripts/analysis_data_generation/scenario_generate_chains.py +++ b/src/scripts/analysis_data_generation/scenario_generate_chains.py @@ -22,18 +22,42 @@ from tlo.methods.fullmodel import fullmodel from tlo.methods.scenario_switcher import ImprovedHealthSystemAndCareSeekingScenarioSwitcher from tlo.scenario import BaseScenario - +from tlo.methods import ( + alri, + cardio_metabolic_disorders, + care_of_women_during_pregnancy, + contraception, + demography, + depression, + diarrhoea, + enhanced_lifestyle, + epi, + healthburden, + healthseekingbehaviour, + healthsystem, + hiv, + rti, + labour, + malaria, + newborn_outcomes, + postnatal_supervisor, + pregnancy_supervisor, + stunting, + symptommanager, + tb, + wasting, +) class GenerateDataChains(BaseScenario): def __init__(self): super().__init__() self.seed = 0 self.start_date = Date(2010, 1, 1) - self.end_date = self.start_date + pd.DateOffset(months=1) - self.pop_size = 120 + self.end_date = self.start_date + pd.DateOffset(months=13) + self.pop_size = 1000 self._scenarios = self._get_scenarios() self.number_of_draws = len(self._scenarios) - self.runs_per_draw = 1 + self.runs_per_draw = 50 self.generate_event_chains = True def log_configuration(self): @@ -51,10 +75,23 @@ def log_configuration(self): } def modules(self): - return ( - fullmodel(resourcefilepath=self.resources) - + [ImprovedHealthSystemAndCareSeekingScenarioSwitcher(resourcefilepath=self.resources)] - ) + # MODIFY + # Here instead of running full module + return [demography.Demography(resourcefilepath=self.resources), + enhanced_lifestyle.Lifestyle(resourcefilepath=self.resources), + healthburden.HealthBurden(resourcefilepath=self.resources), + symptommanager.SymptomManager(resourcefilepath=self.resources, spurious_symptoms=False), + rti.RTI(resourcefilepath=self.resources), + healthseekingbehaviour.HealthSeekingBehaviour(resourcefilepath=self.resources), + #simplified_births.SimplifiedBirths(resourcefilepath=resourcefilepath), + healthsystem.HealthSystem(resourcefilepath=self.resources, + mode_appt_constraints=1, + cons_availability='all')] + + # return ( + # fullmodel(resourcefilepath=self.resources) + # + [ImprovedHealthSystemAndCareSeekingScenarioSwitcher(resourcefilepath=self.resources)] + # ) def draw_parameters(self, draw_number, rng): if draw_number < self.number_of_draws: diff --git a/src/tlo/events.py b/src/tlo/events.py index 00a6fe4e7d..ba8024f621 100644 --- a/src/tlo/events.py +++ b/src/tlo/events.py @@ -11,7 +11,7 @@ import pandas as pd -FACTOR_POP_DICT = 5000 +from tlo.util import FACTOR_POP_DICT logger = logging.getLogger(__name__) @@ -132,7 +132,7 @@ def store_chains_to_do_before_event(self) -> tuple[bool, pd.Series, pd.DataFrame # Save row for comparison after event has occurred row_before = self.sim.population.props.loc[abs(self.target)].copy().fillna(-99999) - if debug_chains: + if self.sim.debug_generate_event_chains: # TO BE REMOVED This is currently just used for debugging. Will be removed from final version of PR. row = self.sim.population.props.loc[[abs(self.target)]] row['person_ID'] = self.target @@ -142,6 +142,7 @@ def store_chains_to_do_before_event(self) -> tuple[bool, pd.Series, pd.DataFrame self.sim.event_chains = pd.concat([self.sim.event_chains, row], ignore_index=True) else: + # This will be a population-wide event. In order to find individuals for which this led to # a meaningful change, make a copy of the pop dataframe before the event has occurred. df_before = self.sim.population.props.copy() @@ -174,7 +175,7 @@ def store_chains_to_do_after_event(self, print_chains, row_before, df_before) -> chain_links[self.target] = str(link_info) # TO BE REMOVED This is currently just used for debugging. Will be removed from final version of PR. - if debug_chains: + if self.sim.debug_generate_event_chains: # Print entire row row = self.sim.population.props.loc[[abs(self.target)]] # Use abs to avoid potentil issue with direct births row['person_ID'] = self.target @@ -194,7 +195,7 @@ def store_chains_to_do_after_event(self, print_chains, row_before, df_before) -> chain_links = self.compare_population_dataframe(df_before, df_after) # TO BE REMOVED This is currently just used for debugging. Will be removed from final version of PR. - if debug_chains: + if self.sim.debug_generate_event_chains: # Or print entire rows change = df_before.compare(df_after) if not change.empty: @@ -233,7 +234,6 @@ def run(self): # Create empty logger for entire pop pop_dict = {i: '' for i in range(FACTOR_POP_DICT)} # Always include all possible individuals - pop_dict.update(chain_links) # Log chain_links here diff --git a/src/tlo/methods/hsi_event.py b/src/tlo/methods/hsi_event.py index d657e9d3a0..bdf597fba4 100644 --- a/src/tlo/methods/hsi_event.py +++ b/src/tlo/methods/hsi_event.py @@ -8,10 +8,9 @@ from tlo import Date, logging from tlo.events import Event from tlo.population import Population - +from tlo.util import FACTOR_POP_DICT import pandas as pd -FACTOR_POP_DICT = 5000 if TYPE_CHECKING: @@ -219,19 +218,21 @@ def store_chains_to_do_before_event(self) -> tuple[bool, pd.Series]: # Save row for comparison after event has occurred row_before = self.sim.population.props.loc[abs(self.target)].copy().fillna(-99999) - # TO BE REMOVED This is currently just used for debugging. Will be removed from final version of PR. - row = self.sim.population.props.loc[[abs(self.target)]] - row['person_ID'] = self.target - row['event'] = str(self) - row['event_date'] = self.sim.date - row['when'] = 'Before' - try: - row['appt_footprint'] = str(self.EXPECTED_APPT_FOOTPRINT) - row['level'] = self.facility_info.level - except: - row['appt_footprint'] = 'N/A' - row['level'] = 'N/A' - self.sim.event_chains = pd.concat([self.sim.event_chains, row], ignore_index=True) + if self.sim.debug_generate_event_chains: + # TO BE REMOVED This is currently just used for debugging. Will be removed from final version of PR. + row = self.sim.population.props.loc[[abs(self.target)]] + row['person_ID'] = self.target + row['event'] = str(self) + row['event_date'] = self.sim.date + row['when'] = 'Before' + + try: + row['appt_footprint'] = str(self.EXPECTED_APPT_FOOTPRINT) + row['level'] = self.facility_info.level + except: + row['appt_footprint'] = 'N/A' + row['level'] = 'N/A' + self.sim.event_chains = pd.concat([self.sim.event_chains, row], ignore_index=True) else: # Once this has been removed from Chronic Syndrome mock module, make this a Runtime Error @@ -280,15 +281,16 @@ def store_chains_to_do_after_event(self, print_chains, row_before, footprint) -> chain_links = {self.target : str(link_info)} - # TO BE REMOVED This is currently just used for debugging. Will be removed from final version of PR. - row = self.sim.population.props.loc[[abs(self.target)]] - row['person_ID'] = self.target - row['event'] = str(self) - row['event_date'] = self.sim.date - row['when'] = 'After' - row['appt_footprint'] = record_footprint - row['level'] = record_level - self.sim.event_chains = pd.concat([self.sim.event_chains, row], ignore_index=True) + if self.sim.debug_generate_event_chains: + # TO BE REMOVED This is currently just used for debugging. Will be removed from final version of PR. + row = self.sim.population.props.loc[[abs(self.target)]] + row['person_ID'] = self.target + row['event'] = str(self) + row['event_date'] = self.sim.date + row['when'] = 'After' + row['appt_footprint'] = record_footprint + row['level'] = record_level + self.sim.event_chains = pd.concat([self.sim.event_chains, row], ignore_index=True) return chain_links diff --git a/src/tlo/methods/rti.py b/src/tlo/methods/rti.py index 3642365976..1ca2749af7 100644 --- a/src/tlo/methods/rti.py +++ b/src/tlo/methods/rti.py @@ -2776,7 +2776,7 @@ class RTIPollingEvent(RegularEvent, PopulationScopeEventMixin): def __init__(self, module): """Schedule to take place every month """ - super().__init__(module, frequency=DateOffset(months=1000)) + super().__init__(module, frequency=DateOffset(months=1000)) # Single polling event p = module.parameters # Parameters which transition the model between states self.base_1m_prob_rti = (p['base_rate_injrti'] / 12) @@ -2864,10 +2864,12 @@ def apply(self, population): .when('.between(70,79)', self.rr_injrti_age7079), Predictor('li_ex_alc').when(True, self.rr_injrti_excessalcohol) ) - if self.sim.generate_event_chains is True and self.sim.generate_event_chains_overwrite_epi is True: - pred = 1.0 - else: - pred = eq.predict(df.loc[rt_current_non_ind]) + #if self.sim.generate_event_chains is True and self.sim.generate_event_chains_overwrite_epi is True: + pred = 1.0 + #else: + # pred = eq.predict(df.loc[rt_current_non_ind]) + + random_draw_in_rti = self.module.rng.random_sample(size=len(rt_current_non_ind)) selected_for_rti = rt_current_non_ind[pred > random_draw_in_rti] @@ -4852,6 +4854,7 @@ def __init__(self, module, person_id): self.treated_code = 'none' def apply(self, person_id, squeeze_factor): + self._number_of_times_this_event_has_run += 1 df = self.sim.population.props rng = self.module.rng @@ -4900,10 +4903,12 @@ def apply(self, person_id, squeeze_factor): # injury is being treated in this surgery # find untreated injury codes that are treated with major surgery relevant_codes = np.intersect1d(injuries_to_be_treated, surgically_treated_codes) + # check that the person sent here has an appropriate code(s) assert len(relevant_codes) > 0 # choose a code at random self.treated_code = rng.choice(relevant_codes) + if request_outcome: # check the people sent here hasn't died due to rti, have had their injuries diagnosed and been through # RTI_Med @@ -4990,7 +4995,9 @@ def apply(self, person_id, squeeze_factor): # ------------------------------------- Perm disability from amputation ------------------------------------ codes = ['782', '782a', '782b', '782c', '783', '882', '883', '884'] + if self.treated_code in codes: + # Track whether they are permanently disabled df.at[person_id, 'rt_perm_disability'] = True # Find the column and code where the permanent injury is stored diff --git a/src/tlo/simulation.py b/src/tlo/simulation.py index d9ba62c43a..bb766562a0 100644 --- a/src/tlo/simulation.py +++ b/src/tlo/simulation.py @@ -11,8 +11,9 @@ from typing import Optional from typing import TYPE_CHECKING, Optional import pandas as pd - +import tlo.population import numpy as np +from tlo.util import FACTOR_POP_DICT try: import dill @@ -40,8 +41,6 @@ logger_chains = logging.getLogger("tlo.methods.event") logger_chains.setLevel(logging.INFO) -FACTOR_POP_DICT = 5000 - class SimulationPreviouslyInitialisedError(Exception): """Exception raised when trying to initialise an already initialised simulation.""" @@ -113,12 +112,15 @@ def __init__( self.generate_event_chains_overwrite_epi = None self.generate_event_chains_modules_of_interest = [] self.generate_event_chains_ignore_events = [] + self.debug_generate_event_chains = False self.end_date = None self.output_file = None self.population: Optional[Population] = None - # TO BE REMOVED This is currently just used for debugging. Will be removed from final version of PR. - self.event_chains: Optinoal[Population] = None + + if self.debug_generate_event_chains: + # TO BE REMOVED This is currently just used for debugging. Will be removed from final version of PR. + self.event_chains: Optional[Population] = None self.show_progress_bar = show_progress_bar self.resourcefilepath = resourcefilepath @@ -288,8 +290,9 @@ def make_initial_population(self, *, n: int) -> None: data=f"{module.name}.initialise_population() {time.time() - start1} s", ) - # TO BE REMOVED This is currently just used for debugging. Will be removed from final version of PR. - self.event_chains = pd.DataFrame(columns= list(self.population.props.columns)+['person_ID'] + ['event'] + ['event_date'] + ['when'] + ['appt_footprint'] + ['level']) + if self.debug_generate_event_chains: + # TO BE REMOVED This is currently just used for debugging. Will be removed from final version of PR. + self.event_chains = pd.DataFrame(columns= list(self.population.props.columns)+['person_ID'] + ['event'] + ['event_date'] + ['when'] + ['appt_footprint'] + ['level']) # When logging events for each individual to reconstruct chains, only the changes in individual properties will be logged. # At the start of the simulation + when a new individual is born, we therefore want to store all of their properties at the start. @@ -329,7 +332,7 @@ def initialise(self, *, end_date: Date) -> None: self.generate_event_chains_overwrite_epi = True # For now keep these fixed, eventually they will be input from user self.generate_event_chains_modules_of_interest = [self.modules] - self.generate_event_chains_ignore_events = ['AgeUpdateEvent','HealthSystemScheduler', 'SimplifiedBirthsPoll','DirectBirth'] #['TbActiveCasePollGenerateData','HivPollingEventForDataGeneration','SimplifiedBirthsPoll', 'AgeUpdateEvent', 'HealthSystemScheduler'] + self.generate_event_chains_ignore_events = ['AgeUpdateEvent','HealthSystemScheduler', 'SimplifiedBirthsPoll','DirectBirth', 'HealthSeekingBehaviourPoll', 'LifestyleEvent'] #['TbActiveCasePollGenerateData','HivPollingEventForDataGeneration','SimplifiedBirthsPoll', 'AgeUpdateEvent', 'HealthSystemScheduler'] else: # If not using to print chains, cannot ignore epi self.generate_event_chains_overwrite_epi = False @@ -418,8 +421,9 @@ def run_simulation_to(self, *, to_date: Date) -> None: self.fire_single_event(event, date) self.date = to_date - # TO BE REMOVED: this is currently only used for debugging, will be removed from final PR. - self.event_chains.to_csv('output.csv', index=False) + if self.debug_generate_event_chains: + # TO BE REMOVED: this is currently only used for debugging, will be removed from final PR. + self.event_chains.to_csv('output.csv', index=False) if self.show_progress_bar: progress_bar.stop() @@ -492,13 +496,14 @@ def do_birth(self, mother_id: int) -> int: data = pop_dict, description='Links forming chains of events for simulated individuals') - # TO BE REMOVED This is currently just used for debugging. Will be removed from final version of PR. - row = self.population.props.iloc[[child_id]] - row['person_ID'] = child_id - row['event'] = 'Birth' - row['event_date'] = self.date - row['when'] = 'After' - self.event_chains = pd.concat([self.event_chains, row], ignore_index=True) + if self.debug_generate_event_chains: + # TO BE REMOVED This is currently just used for debugging. Will be removed from final version of PR. + row = self.population.props.iloc[[child_id]] + row['person_ID'] = child_id + row['event'] = 'Birth' + row['event_date'] = self.date + row['when'] = 'After' + self.event_chains = pd.concat([self.event_chains, row], ignore_index=True) return child_id diff --git a/src/tlo/util.py b/src/tlo/util.py index 168b1d41a1..f8dc67d471 100644 --- a/src/tlo/util.py +++ b/src/tlo/util.py @@ -12,6 +12,7 @@ # Default mother_id value, assigned to individuals initialised as adults at the start of the simulation. DEFAULT_MOTHER_ID = -1e7 +FACTOR_POP_DICT = 1000 def create_age_range_lookup(min_age: int, max_age: int, range_size: int = 5) -> (list, Dict[int, str]): From 0dd862f2a9b485a33933e185e3c59ad64ed33ed9 Mon Sep 17 00:00:00 2001 From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com> Date: Tue, 26 Nov 2024 15:28:30 +0000 Subject: [PATCH 20/54] Change label of person when iterating --- .../analysis_extract_data.py | 68 ++++++++++++------- 1 file changed, 43 insertions(+), 25 deletions(-) diff --git a/src/scripts/analysis_data_generation/analysis_extract_data.py b/src/scripts/analysis_data_generation/analysis_extract_data.py index 6eb6408830..4c8e7d8197 100644 --- a/src/scripts/analysis_data_generation/analysis_extract_data.py +++ b/src/scripts/analysis_data_generation/analysis_extract_data.py @@ -41,7 +41,7 @@ def apply(results_folder: Path, output_folder: Path, resourcefilepath: Path = No 'nan': float('nan'), # Include NaN for eval (can also use pd.NA if preferred) } - initial_properties_of_interest = ['rt_inj_severity','rt_MAIS_military_score','rt_ISS_score','rt_disability','rt_polytrauma','rt_injury_1','rt_injury_2','rt_injury_3','rt_injury_4','rt_injury_5','rt_injury_6', 'rt_imm_death','sy_injury','sex','li_urban', 'li_wealth', 'li_ex_alc', 'li_exposed_to_campaign_alcohol_reduction', 'li_mar_stat', 'li_in_ed', 'li_ed_lev'] + initial_properties_of_interest = ['rt_MAIS_military_score','rt_ISS_score','rt_disability','rt_polytrauma','rt_injury_1','rt_injury_2','rt_injury_3','rt_injury_4','rt_injury_5','rt_injury_6', 'rt_imm_death','sy_injury','sy_severe_trauma','sex','li_urban', 'li_wealth', 'li_mar_stat', 'li_in_ed', 'li_ed_lev'] # Will be added through computation: age at time of RTI @@ -54,13 +54,15 @@ def apply(results_folder: Path, output_folder: Path, resourcefilepath: Path = No record = [] - for i in range(0,num_individuals): + for p in range(0,num_individuals): + + print("At person = ", p) individual_event_chains = extract_results( results_folder, module='tlo.simulation', key='event_chains', - column=str(i), + column=str(p), do_scaling=False ) @@ -69,7 +71,7 @@ def apply(results_folder: Path, output_folder: Path, resourcefilepath: Path = No for r in range(0,num_runs): - print("AT RUN = ", r) + initial_properties = {} progression_properties = {} @@ -78,7 +80,8 @@ def apply(results_folder: Path, output_folder: Path, resourcefilepath: Path = No first_event = {} last_event = {} properties = {} - + average_disability = 0 + prev_disability_incurred = 0 #ind_Counter = Counter() ind_Counter = {'0': Counter(), '1a': Counter(), '1b' : Counter(), '2' : Counter()} @@ -95,7 +98,7 @@ def apply(results_folder: Path, output_folder: Path, resourcefilepath: Path = No # print(value) initial_properties = list_for_individual[0] - print(initial_properties) + # print(initial_properties) # Initialise first event by gathering parameters of interest from initial_properties first_event = {key: initial_properties[key] for key in initial_properties_of_interest if key in initial_properties} @@ -103,8 +106,8 @@ def apply(results_folder: Path, output_folder: Path, resourcefilepath: Path = No progression_properties = {} for i in list_for_individual: if 'event' in i: - print("") - print(i) + #print("") + #print(i) if 'RTIPolling' in i['event']: #print("I'm in polling event") #print(i) @@ -126,10 +129,26 @@ def apply(results_folder: Path, output_folder: Path, resourcefilepath: Path = No # Keep track of evolution in individual's properties progression_properties = initial_properties.copy() progression_properties.update(i) + + # dalys incurred + if 'rt_disability' in i: + prev_disability_incurred = i['rt_disability'] + prev_date = i['event_date'] + #print('At polling event, ', prev_disability_incurred, prev_date) else: # Progress properties of individual, even if this event is a death progression_properties.update(i) + + # If disability has changed as a result of this, recalculate + if 'rt_disability' in i and i['rt_disability'] != prev_disability_incurred: + dt_in_prev_disability = (i['event_date'] - prev_date).days + average_disability += prev_disability_incurred*dt_in_prev_disability + # Update variables + prev_disability_incurred = i['rt_disability'] + prev_date = i['event_date'] + + #print(progression_properties) # Update footprint @@ -141,34 +160,33 @@ def apply(results_folder: Path, output_folder: Path, resourcefilepath: Path = No ind_Counter[i['level']].update(Counter(apply)) if 'is_alive' in i and i['is_alive'] is False: - print("Death", i) - print("-------Total footprint", ind_Counter) + #print("Death", i) + #print("-------Total footprint", ind_Counter) break # Compute final properties of individual key_last_event['is_alive_after_RTI'] = progression_properties['is_alive'] key_last_event['duration_days'] = (progression_properties['event_date'] - polling_event['rt_date_inj']).days - key_last_event['rt_disability_final'] = progression_properties['rt_disability'] + if not key_first_event['rt_imm_death'] and key_last_event['duration_days']> 0.0: + key_last_event['rt_disability_average'] = average_disability/key_last_event['duration_days'] + else: + key_last_event['rt_disability_average'] = 0.0 + key_last_event['rt_disability_permanent'] = progression_properties['rt_disability'] key_last_event.update({'total_footprint': ind_Counter}) - - #print("-------Total footprint", ind_Counter) - #for key, value in key_first_event.items(): - # if 'rt_' in key or 'alive' in key: - # print(f"{key}: {value}") - #print(#) - #for key, value in key_last_event.items(): - #if 'rt_' in key or 'alive' in key or 'event_date' in key or 'footprint' in key: - # print(f"{key}: {value}") - #print(key_first_event) - #print(key_last_event) - print(initial_rt_event_properties) + #print("Average disability", key_last_event['rt_disability_average']) + properties = key_first_event | key_last_event + + if not key_first_event['rt_imm_death'] and ((properties['rt_disability_average']-properties['rt_disability'])/properties['rt_disability'] > 1e-4): + print("Error in computed average for individual ", p, r ) + record.append(properties) - for key, value in properties.items(): + #for key, value in properties.items(): #if 'rt_' in key or 'alive' in key or 'event_date' in key or 'footprint' in key: - print(f"{key}: {value}") + #print(f"{key}: {value}") + # print("Initial event properties", initial_rt_event_properties) df = pd.DataFrame(record) df.to_csv("raw_data.csv", index=False) From 84f826322ba13f6fa1631d639944c2bac50667f6 Mon Sep 17 00:00:00 2001 From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com> Date: Fri, 13 Dec 2024 15:55:03 +0000 Subject: [PATCH 21/54] Correctly retrieve event name --- src/tlo/events.py | 12 ++++++------ src/tlo/methods/hsi_event.py | 6 +++--- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/src/tlo/events.py b/src/tlo/events.py index ba8024f621..f67b54458a 100644 --- a/src/tlo/events.py +++ b/src/tlo/events.py @@ -97,7 +97,7 @@ def compare_population_dataframe(self,df_before, df_after): # First add event info link_info = { 'person_ID': idx, - 'event': str(self), + 'event': type(self).__name__, 'event_date': self.sim.date, } @@ -136,7 +136,7 @@ def store_chains_to_do_before_event(self) -> tuple[bool, pd.Series, pd.DataFrame # TO BE REMOVED This is currently just used for debugging. Will be removed from final version of PR. row = self.sim.population.props.loc[[abs(self.target)]] row['person_ID'] = self.target - row['event'] = str(self) + row['event'] = type(self).__name__ row['event_date'] = self.sim.date row['when'] = 'Before' self.sim.event_chains = pd.concat([self.sim.event_chains, row], ignore_index=True) @@ -164,7 +164,7 @@ def store_chains_to_do_after_event(self, print_chains, row_before, df_before) -> link_info = { #'person_ID' : self.target, 'person_ID' : self.target, - 'event' : str(self), + 'event' : type(self).__name__, 'event_date' : self.sim.date, } # Store (if any) property changes as a result of the event for this individual @@ -179,7 +179,7 @@ def store_chains_to_do_after_event(self, print_chains, row_before, df_before) -> # Print entire row row = self.sim.population.props.loc[[abs(self.target)]] # Use abs to avoid potentil issue with direct births row['person_ID'] = self.target - row['event'] = str(self) + row['event'] = type(self).__name__ row['event_date'] = self.sim.date row['when'] = 'After' self.sim.event_chains = pd.concat([self.sim.event_chains, row], ignore_index=True) @@ -202,13 +202,13 @@ def store_chains_to_do_after_event(self, print_chains, row_before, df_before) -> indices = change.index new_rows_before = df_before.loc[indices] new_rows_before['person_ID'] = new_rows_before.index - new_rows_before['event'] = self + new_rows_before['event'] = type(self).__name__ new_rows_before['event_date'] = self.sim.date new_rows_before['when'] = 'Before' new_rows_after = df_after.loc[indices] new_rows_after['person_ID'] = new_rows_after.index - new_rows_after['event'] = self + new_rows_after['event'] = type(self).__name__ new_rows_after['event_date'] = self.sim.date new_rows_after['when'] = 'After' diff --git a/src/tlo/methods/hsi_event.py b/src/tlo/methods/hsi_event.py index f267181b56..978b26d7c5 100644 --- a/src/tlo/methods/hsi_event.py +++ b/src/tlo/methods/hsi_event.py @@ -222,7 +222,7 @@ def store_chains_to_do_before_event(self) -> tuple[bool, pd.Series]: # TO BE REMOVED This is currently just used for debugging. Will be removed from final version of PR. row = self.sim.population.props.loc[[abs(self.target)]] row['person_ID'] = self.target - row['event'] = str(self) + row['event'] = type(self).__name__ #str(self.event_name) row['event_date'] = self.sim.date row['when'] = 'Before' @@ -268,7 +268,7 @@ def store_chains_to_do_after_event(self, print_chains, row_before, footprint) -> link_info = { 'person_ID': self.target, - 'event' : str(self), + 'event' : type(self).__name__, 'event_date' : self.sim.date, 'appt_footprint' : record_footprint, 'level' : record_level, @@ -285,7 +285,7 @@ def store_chains_to_do_after_event(self, print_chains, row_before, footprint) -> # TO BE REMOVED This is currently just used for debugging. Will be removed from final version of PR. row = self.sim.population.props.loc[[abs(self.target)]] row['person_ID'] = self.target - row['event'] = str(self) + row['event'] = type(self).__name__ row['event_date'] = self.sim.date row['when'] = 'After' row['appt_footprint'] = record_footprint From a490d1995c12ac20beda2fbd16271d22f0e4f8fe Mon Sep 17 00:00:00 2001 From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com> Date: Mon, 20 Jan 2025 11:34:02 +0000 Subject: [PATCH 22/54] Modify scenario file such that can exclude specific services, and corrected analysis file such as for small number of cases where the DALYs are not explicitly resolved the average DALYs are still computed correctly [skip ci] --- .../analysis_extract_data.py | 105 ++++++++++-------- .../scenario_generate_chains.py | 58 +++++++--- 2 files changed, 103 insertions(+), 60 deletions(-) diff --git a/src/scripts/analysis_data_generation/analysis_extract_data.py b/src/scripts/analysis_data_generation/analysis_extract_data.py index 4c8e7d8197..3afad7adcc 100644 --- a/src/scripts/analysis_data_generation/analysis_extract_data.py +++ b/src/scripts/analysis_data_generation/analysis_extract_data.py @@ -16,6 +16,9 @@ from collections import Counter import ast +# Time simulated to collect data +start_date = Date(2010, 1, 1) +end_date = start_date + pd.DateOffset(months=13) # Range of years considered min_year = 2010 @@ -25,6 +28,13 @@ def all_columns(_df): return pd.Series(_df.all()) +def check_if_beyond_time_range_considered(progression_properties): + matching_keys = [key for key in progression_properties.keys() if "rt_date_to_remove_daly" in key] + if matching_keys: + for key in matching_keys: + if progression_properties[key] > end_date: + print("Beyond time range considered, need at least ",progression_properties[key]) + def apply(results_folder: Path, output_folder: Path, resourcefilepath: Path = None, ): """Produce standard set of plots describing the effect of each TREATMENT_ID. - We estimate the epidemiological impact as the EXTRA deaths that would occur if that treatment did not occur. @@ -44,19 +54,21 @@ def apply(results_folder: Path, output_folder: Path, resourcefilepath: Path = No initial_properties_of_interest = ['rt_MAIS_military_score','rt_ISS_score','rt_disability','rt_polytrauma','rt_injury_1','rt_injury_2','rt_injury_3','rt_injury_4','rt_injury_5','rt_injury_6', 'rt_imm_death','sy_injury','sy_severe_trauma','sex','li_urban', 'li_wealth', 'li_mar_stat', 'li_in_ed', 'li_ed_lev'] # Will be added through computation: age at time of RTI - # Will be added through computation: total duration of event initial_rt_event_properties = set() - + num_individuals = 1000 num_runs = 50 record = [] - + # Include results folder in output file name + name_tag = str(results_folder).replace("outputs/", "") + + for p in range(0,num_individuals): - print("At person = ", p) + print("At person = ", p, " out of ", num_individuals) individual_event_chains = extract_results( results_folder, @@ -66,51 +78,41 @@ def apply(results_folder: Path, output_folder: Path, resourcefilepath: Path = No do_scaling=False ) - #print(individual_event_chains) - - for r in range(0,num_runs): - - - initial_properties = {} - progression_properties = {} key_first_event = {} key_last_event = {} first_event = {} last_event = {} properties = {} average_disability = 0 + total_dt_included = 0 + dt_in_prev_disability = 0 prev_disability_incurred = 0 - - #ind_Counter = Counter() ind_Counter = {'0': Counter(), '1a': Counter(), '1b' : Counter(), '2' : Counter()} # Count total appts list_for_individual = [] for item,row in individual_event_chains.iterrows(): value = individual_event_chains.loc[item,(0, r)] - # print("The value is", value, "at run ", r) if value !='' and isinstance(value, str): evaluated = eval(value, eval_env) list_for_individual.append(evaluated) - # elif not isinstance(value,str): - # print(value) + # These are the properties of the individual before the start of the chain of events initial_properties = list_for_individual[0] - # print(initial_properties) # Initialise first event by gathering parameters of interest from initial_properties first_event = {key: initial_properties[key] for key in initial_properties_of_interest if key in initial_properties} + # The changing or adding of properties from the first_event will be stored in progression_properties progression_properties = {} + for i in list_for_individual: + # Skip the initial_properties, or in other words only consider these if they are 'proper' events if 'event' in i: - #print("") #print(i) if 'RTIPolling' in i['event']: - #print("I'm in polling event") - #print(i) # Keep track of which properties are changed during polling events for key,value in i.items(): @@ -130,67 +132,80 @@ def apply(results_folder: Path, output_folder: Path, resourcefilepath: Path = No progression_properties = initial_properties.copy() progression_properties.update(i) - # dalys incurred + # Initialise chain of Dalys incurred if 'rt_disability' in i: prev_disability_incurred = i['rt_disability'] prev_date = i['event_date'] - #print('At polling event, ', prev_disability_incurred, prev_date) else: # Progress properties of individual, even if this event is a death progression_properties.update(i) - # If disability has changed as a result of this, recalculate - if 'rt_disability' in i and i['rt_disability'] != prev_disability_incurred: + # If disability has changed as a result of this, recalculate and add previous to rolling average + if 'rt_disability' in i: + dt_in_prev_disability = (i['event_date'] - prev_date).days + #print("Detected change in disability", i['rt_disability'], "after dt=", dt_in_prev_disability) + #print("Adding the following to the average", prev_disability_incurred, " x ", dt_in_prev_disability ) average_disability += prev_disability_incurred*dt_in_prev_disability + total_dt_included += dt_in_prev_disability # Update variables prev_disability_incurred = i['rt_disability'] prev_date = i['event_date'] - - - #print(progression_properties) - # Update footprint + # Update running footprint if 'appt_footprint' in i and i['appt_footprint'] != 'Counter()': footprint = i['appt_footprint'] if 'Counter' in footprint: footprint = footprint[len("Counter("):-1] apply = eval(footprint, eval_env) ind_Counter[i['level']].update(Counter(apply)) - + + # If the individual has died, ensure chain of event is interrupted here and update rolling average of DALYs if 'is_alive' in i and i['is_alive'] is False: - #print("Death", i) - #print("-------Total footprint", ind_Counter) + if ((i['event_date'] - polling_event['rt_date_inj']).days) > total_dt_included: + dt_in_prev_disability = (i['event_date'] - prev_date).days + average_disability += prev_disability_incurred*dt_in_prev_disability + total_dt_included += dt_in_prev_disability break - - + + # check_if_beyond_time_range_considered(progression_properties) + # Compute final properties of individual key_last_event['is_alive_after_RTI'] = progression_properties['is_alive'] key_last_event['duration_days'] = (progression_properties['event_date'] - polling_event['rt_date_inj']).days - if not key_first_event['rt_imm_death'] and key_last_event['duration_days']> 0.0: + + # If individual didn't die and the key_last_event didn't result in a final change in DALYs, ensure that the last change is recorded here + if not key_first_event['rt_imm_death'] and (total_dt_included < key_last_event['duration_days']): + #print("Number of events", len(list_for_individual)) + #for i in list_for_individual: + # if 'event' in i: + # print(i) + dt_in_prev_disability = (progression_properties['event_date'] - prev_date).days + average_disability += prev_disability_incurred*dt_in_prev_disability + total_dt_included += dt_in_prev_disability + + # Now calculate the average disability incurred, and store any permanent disability and total footprint + if not key_first_event['rt_imm_death'] and key_last_event['duration_days']> 0: key_last_event['rt_disability_average'] = average_disability/key_last_event['duration_days'] else: key_last_event['rt_disability_average'] = 0.0 + key_last_event['rt_disability_permanent'] = progression_properties['rt_disability'] key_last_event.update({'total_footprint': ind_Counter}) - #print("Average disability", key_last_event['rt_disability_average']) + if key_last_event['duration_days']!=total_dt_included: + print("The duration of event and total_dt_included don't match", key_last_event['duration_days'], total_dt_included) + exit(-1) properties = key_first_event | key_last_event - - if not key_first_event['rt_imm_death'] and ((properties['rt_disability_average']-properties['rt_disability'])/properties['rt_disability'] > 1e-4): - print("Error in computed average for individual ", p, r ) record.append(properties) - #for key, value in properties.items(): - #if 'rt_' in key or 'alive' in key or 'event_date' in key or 'footprint' in key: - #print(f"{key}: {value}") - # print("Initial event properties", initial_rt_event_properties) - - df = pd.DataFrame(record) - df.to_csv("raw_data.csv", index=False) + + df = pd.DataFrame(record) + df.to_csv("new_raw_data_" + name_tag + ".csv", index=False) + print(df) print(initial_rt_event_properties) exit(-1) diff --git a/src/scripts/analysis_data_generation/scenario_generate_chains.py b/src/scripts/analysis_data_generation/scenario_generate_chains.py index 79df3f55b6..822bf13ad8 100644 --- a/src/scripts/analysis_data_generation/scenario_generate_chains.py +++ b/src/scripts/analysis_data_generation/scenario_generate_chains.py @@ -18,7 +18,7 @@ import pandas as pd from tlo import Date, logging -from tlo.analysis.utils import get_parameters_for_status_quo, mix_scenarios +from tlo.analysis.utils import get_parameters_for_status_quo, mix_scenarios, get_filtered_treatment_ids from tlo.methods.fullmodel import fullmodel from tlo.methods.scenario_switcher import ImprovedHealthSystemAndCareSeekingScenarioSwitcher from tlo.scenario import BaseScenario @@ -92,7 +92,35 @@ def modules(self): # fullmodel(resourcefilepath=self.resources) # + [ImprovedHealthSystemAndCareSeekingScenarioSwitcher(resourcefilepath=self.resources)] # ) + """ + def draw_parameters(self, draw_number, rng): + return mix_scenarios( + get_parameters_for_status_quo(), + { + 'HealthSystem': { + 'Service_Availability': list(self._scenarios.values())[draw_number], + }, + } + ) + def _get_scenarios(self) -> Dict[str, list[str]]: + Return the Dict with values for the parameter `Service_Availability` keyed by a name for the scenario. + The sequences of scenarios systematically omits one of the TREATMENT_ID's that is defined in the model. + + # Generate list of TREATMENT_IDs and filter to the resolution needed + treatments = get_filtered_treatment_ids(depth=2) + treatments_RTI = [item for item in treatments if 'Rti' in item] + + # Return 'Service_Availability' values, with scenarios for everything, nothing, and ones for which each + # treatment is omitted + service_availability = dict({"Everything": ["*", "Nothing": []}) + #service_availability.update( + # {f"No {t.replace('_*', '*')}": [x for x in treatments if x != t] for t in treatments_RTI} + #) + + return service_availability + + """ def draw_parameters(self, draw_number, rng): if draw_number < self.number_of_draws: return list(self._scenarios.values())[draw_number] @@ -107,20 +135,27 @@ def draw_parameters(self, draw_number, rng): # case 6: gfHE = 0.030, factor = 1.07326 def _get_scenarios(self) -> Dict[str, Dict]: - """Return the Dict with values for the parameters that are changed, keyed by a name for the scenario. - """ + #Return the Dict with values for the parameters that are changed, keyed by a name for the scenario. + + treatments = get_filtered_treatment_ids(depth=2) + treatments_RTI = [item for item in treatments if 'Rti' in item] - self.YEAR_OF_CHANGE = 2019 + # Return 'Service_Availability' values, with scenarios for everything, nothing, and ones for which each + # treatment is omitted + service_availability = dict({"Everything": ["*"], "Nothing": []}) + service_availability.update( + {f"No {t.replace('_*', '*')}": [x for x in treatments if x != t] for t in treatments_RTI} + ) + print(service_availability.keys()) return { - # =========== STATUS QUO ============ "Baseline": mix_scenarios( self._baseline(), { "HealthSystem": { - "yearly_HR_scaling_mode": "no_scaling", + "Service_Availability": service_availability["No Rti_BurnManagement*"], }, } ), @@ -128,20 +163,13 @@ def _get_scenarios(self) -> Dict[str, Dict]: } def _baseline(self) -> Dict: - """Return the Dict with values for the parameter changes that define the baseline scenario. """ + #Return the Dict with values for the parameter changes that define the baseline scenario. return mix_scenarios( get_parameters_for_status_quo(), { "HealthSystem": { "mode_appt_constraints": 1, # <-- Mode 1 prior to change to preserve calibration - "mode_appt_constraints_postSwitch": 2, # <-- Mode 2 post-change to show effects of HRH - "year_mode_switch": self.YEAR_OF_CHANGE, - "scale_to_effective_capabilities": True, - "policy_name": "Naive", - "tclose_overwrite": 1, - "tclose_days_offset_overwrite": 7, - "use_funded_or_actual_staffing": "actual", - "cons_availability": "default", + "cons_availability": "all", } }, ) From 08a5d9a29c9e2e8af7832ca49bfca1cb75f6d8d6 Mon Sep 17 00:00:00 2001 From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com> Date: Sat, 12 Apr 2025 11:34:07 +0100 Subject: [PATCH 23/54] Change seed in scenario file --- .../analysis_data_generation/scenario_generate_chains.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/scripts/analysis_data_generation/scenario_generate_chains.py b/src/scripts/analysis_data_generation/scenario_generate_chains.py index 822bf13ad8..3bc75978d2 100644 --- a/src/scripts/analysis_data_generation/scenario_generate_chains.py +++ b/src/scripts/analysis_data_generation/scenario_generate_chains.py @@ -51,7 +51,7 @@ class GenerateDataChains(BaseScenario): def __init__(self): super().__init__() - self.seed = 0 + self.seed = 42 self.start_date = Date(2010, 1, 1) self.end_date = self.start_date + pd.DateOffset(months=13) self.pop_size = 1000 From 3dda343f65c49e429c677b89d1536531fa83833a Mon Sep 17 00:00:00 2001 From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com> Date: Mon, 14 Apr 2025 18:06:08 +0200 Subject: [PATCH 24/54] latest scenario --- .../analysis_data_generation/scenario_generate_chains.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/scripts/analysis_data_generation/scenario_generate_chains.py b/src/scripts/analysis_data_generation/scenario_generate_chains.py index 3bc75978d2..1297c6b18b 100644 --- a/src/scripts/analysis_data_generation/scenario_generate_chains.py +++ b/src/scripts/analysis_data_generation/scenario_generate_chains.py @@ -155,7 +155,7 @@ def _get_scenarios(self) -> Dict[str, Dict]: self._baseline(), { "HealthSystem": { - "Service_Availability": service_availability["No Rti_BurnManagement*"], + "Service_Availability": service_availability["No Rti_FractureCast*"], }, } ), From d9e3f66138c0e372b2b0fa0ac10e7393457bcaf8 Mon Sep 17 00:00:00 2001 From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com> Date: Tue, 29 Apr 2025 09:35:47 +0100 Subject: [PATCH 25/54] Latest scenario version --- .../analysis_data_generation/scenario_generate_chains.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/scripts/analysis_data_generation/scenario_generate_chains.py b/src/scripts/analysis_data_generation/scenario_generate_chains.py index 1297c6b18b..b4ad946154 100644 --- a/src/scripts/analysis_data_generation/scenario_generate_chains.py +++ b/src/scripts/analysis_data_generation/scenario_generate_chains.py @@ -155,7 +155,7 @@ def _get_scenarios(self) -> Dict[str, Dict]: self._baseline(), { "HealthSystem": { - "Service_Availability": service_availability["No Rti_FractureCast*"], + "Service_Availability": service_availability["No Rti_MinorSurgeries*"], }, } ), From ddf6f689b6b9184e3f09ac1906417e6fa0495a7f Mon Sep 17 00:00:00 2001 From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com> Date: Tue, 29 Apr 2025 15:44:41 +0100 Subject: [PATCH 26/54] Latest version of scenario file --- .../analysis_data_generation/scenario_generate_chains.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/scripts/analysis_data_generation/scenario_generate_chains.py b/src/scripts/analysis_data_generation/scenario_generate_chains.py index b4ad946154..35b7d75e1c 100644 --- a/src/scripts/analysis_data_generation/scenario_generate_chains.py +++ b/src/scripts/analysis_data_generation/scenario_generate_chains.py @@ -155,7 +155,7 @@ def _get_scenarios(self) -> Dict[str, Dict]: self._baseline(), { "HealthSystem": { - "Service_Availability": service_availability["No Rti_MinorSurgeries*"], + "Service_Availability": service_availability["No Rti_ShockTreatment*"], }, } ), From 0e38408d5e37ccb4f894bb89c4d3c93673ae09a3 Mon Sep 17 00:00:00 2001 From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com> Date: Thu, 9 Oct 2025 09:20:35 +0100 Subject: [PATCH 27/54] Ensure changes to mni dataframe are captured as well --- .../scenario_generate_chains.py | 30 ++-- src/tlo/events.py | 164 ++++++++++++++++-- src/tlo/methods/hsi_event.py | 112 ++++++++---- src/tlo/methods/pregnancy_helper_functions.py | 50 +----- src/tlo/methods/pregnancy_supervisor.py | 50 ++++++ src/tlo/methods/rti.py | 4 +- src/tlo/simulation.py | 13 +- src/tlo/util.py | 2 +- 8 files changed, 314 insertions(+), 111 deletions(-) diff --git a/src/scripts/analysis_data_generation/scenario_generate_chains.py b/src/scripts/analysis_data_generation/scenario_generate_chains.py index 35b7d75e1c..64fa70d055 100644 --- a/src/scripts/analysis_data_generation/scenario_generate_chains.py +++ b/src/scripts/analysis_data_generation/scenario_generate_chains.py @@ -53,11 +53,11 @@ def __init__(self): super().__init__() self.seed = 42 self.start_date = Date(2010, 1, 1) - self.end_date = self.start_date + pd.DateOffset(months=13) + self.end_date = self.start_date + pd.DateOffset(months=36) self.pop_size = 1000 self._scenarios = self._get_scenarios() self.number_of_draws = len(self._scenarios) - self.runs_per_draw = 50 + self.runs_per_draw = 1 self.generate_event_chains = True def log_configuration(self): @@ -77,21 +77,31 @@ def log_configuration(self): def modules(self): # MODIFY # Here instead of running full module + """ return [demography.Demography(resourcefilepath=self.resources), enhanced_lifestyle.Lifestyle(resourcefilepath=self.resources), healthburden.HealthBurden(resourcefilepath=self.resources), - symptommanager.SymptomManager(resourcefilepath=self.resources, spurious_symptoms=False), - rti.RTI(resourcefilepath=self.resources), + symptommanager.SymptomManager(resourcefilepath=self.resources, spurious_symptoms=False),#, + #rti.RTI(resourcefilepath=self.resources), + pregnancy_supervisor.PregnancySupervisor(resourcefilepath=self.resources), + labour.Labour(resourcefilepath=self.resources), + care_of_women_during_pregnancy.CareOfWomenDuringPregnancy(resourcefilepath=self.resources), + contraception.Contraception(resourcefilepath=self.resources), + newborn_outcomes.NewbornOutcomes(resourcefilepath=self.resources), + postnatal_supervisor.PostnatalSupervisor(resourcefilepath=self.resources), + hiv.Hiv(resourcefilepath=self.resources), + tb.Tb(resourcefilepath=self.resources), + epi.Epi(resourcefilepath=self.resources), healthseekingbehaviour.HealthSeekingBehaviour(resourcefilepath=self.resources), - #simplified_births.SimplifiedBirths(resourcefilepath=resourcefilepath), + #simplified_births.SimplifiedBirths(resourcefilepath=resourcefilepath), healthsystem.HealthSystem(resourcefilepath=self.resources, mode_appt_constraints=1, cons_availability='all')] - - # return ( - # fullmodel(resourcefilepath=self.resources) - # + [ImprovedHealthSystemAndCareSeekingScenarioSwitcher(resourcefilepath=self.resources)] - # ) + """ + return ( + fullmodel(resourcefilepath=self.resources) + + [ImprovedHealthSystemAndCareSeekingScenarioSwitcher(resourcefilepath=self.resources)] + ) """ def draw_parameters(self, draw_number, rng): return mix_scenarios( diff --git a/src/tlo/events.py b/src/tlo/events.py index f67b54458a..3a8f4f58c7 100644 --- a/src/tlo/events.py +++ b/src/tlo/events.py @@ -13,6 +13,7 @@ from tlo.util import FACTOR_POP_DICT +import copy logger = logging.getLogger(__name__) logger.setLevel(logging.INFO) @@ -76,23 +77,85 @@ def apply(self, target): """ raise NotImplementedError - def compare_population_dataframe(self,df_before, df_after): + def values_differ(self, v1, v2): + + if isinstance(v1, list) and isinstance(v2, list): + return v1 != v2 # simple element-wise comparison + + if pd.isna(v1) and pd.isna(v2): + return False # treat both NaT/NaN as equal + return v1 != v2 + + def compare_entire_mni_dicts(self,entire_mni_before, entire_mni_after): + diffs = {} + """ + will_pause = False + + target_attribute = 'hcw_not_avail' + if len(entire_mni_after)>0: + print("Default target value before", self.sim.modules['PregnancySupervisor'].default_mni_values[target_attribute]) + person = next(iter(entire_mni_after)) + entire_mni_after[person][target_attribute] = True + will_pause = True + print("Default target value after", self.sim.modules['PregnancySupervisor'].default_mni_values[target_attribute]) + + + if will_pause: + print("Reprint") + print(entire_mni_before) + print(entire_mni_after) + print("Default target value", self.sim.modules['PregnancySupervisor'].default_mni_values[target_attribute]) + """ + all_individuals = set(entire_mni_before.keys()) | set(entire_mni_after.keys()) + + for person in all_individuals: + if person not in entire_mni_before: # but is afterward + for key in entire_mni_after[person]: + if self.values_differ(entire_mni_after[person][key],self.sim.modules['PregnancySupervisor'].default_mni_values[key]): + if person not in diffs: + diffs[person] = {} + diffs[person][key] = entire_mni_after[person][key] + + elif person not in entire_mni_after: # but is beforehand + for key in entire_mni_before[person]: + if self.values_differ(entire_mni_before[person][key],self.sim.modules['PregnancySupervisor'].default_mni_values[key]): + if person not in diffs: + diffs[person] = {} + diffs[person][key] = self.sim.modules['PregnancySupervisor'].default_mni_values[key] + + else: # person is in both + # Compare properties + for key in entire_mni_before[person]: + if self.values_differ(entire_mni_before[person][key],entire_mni_after[person][key]): + if person not in diffs: + diffs[person] = {} + diffs[person][key] = entire_mni_after[person][key] + + if len(diffs)>0: + print("DIfferences for ", diffs) + return diffs + + def compare_population_dataframe(self,df_before, df_after, entire_mni_before, entire_mni_after): """ This function compares the population dataframe before/after a population-wide event has occurred. It allows us to identify the individuals for which this event led to a significant (i.e. property) change, and to store the properties which have changed as a result of it. """ # Create a mask of where values are different diff_mask = (df_before != df_after) & ~(df_before.isna() & df_after.isna()) + + diff_mni = self.compare_entire_mni_dicts(entire_mni_before, entire_mni_after) # Create an empty list to store changes for each of the individuals chain_links = {} len_of_diff = len(diff_mask) # Loop through each row of the mask + persons_changed = [] for idx, row in diff_mask.iterrows(): changed_cols = row.index[row].tolist() if changed_cols: # Proceed only if there are changes in the row + persons_changed.append(idx) # Create a dictionary for this person # First add event info link_info = { @@ -104,19 +167,47 @@ def compare_population_dataframe(self,df_before, df_after): # Store the new values from df_after for the changed columns for col in changed_cols: link_info[col] = df_after.at[idx, col] - + + if idx in diff_mni: + # This person has also undergone changes in the mni dictionary, so add these here + for key in diff_mni[idx]: + link_info[col] = diff_mni[idx][key] + # Append the event and changes to the individual key chain_links[idx] = str(link_info) - + + # Check individuals + if len(diff_mni)>0: + print("Non-zero changes in mni") + for key in diff_mni: + if key not in persons_changed: + print("Individual ", key, "is changing in mni alone") + # If individual hadn't been previously added due to changes in pop df, add it here + link_info = { + 'person_ID': key, + 'event': type(self).__name__, + 'event_date': self.sim.date, + } + + for key_prop in diff_mni[key]: + link_info[key_prop] = diff_mni[key][key_prop] + + chain_links[key] = str(link_info) + print("Change for ", key, " is ", str(link_info)) + return chain_links - def store_chains_to_do_before_event(self) -> tuple[bool, pd.Series, pd.DataFrame]: - """ This function checks whether this event should be logged as part of the event chains, and if so stored required information before the event has occurred. """ + def store_chains_to_do_before_event(self) -> tuple[bool, pd.Series, pd.DataFrame, dict, dict, bool]: + """ This function checks whether this event should be logged as part of the event chains, and if so stored required information before the event has occurred. """ + # Initialise these variables print_chains = False df_before = [] row_before = pd.Series() + mni_instances_before = False + mni_row_before = {} + entire_mni_before = {} # Only print event if it belongs to modules of interest and if it is not in the list of events to ignore #if (self.module in self.sim.generate_event_chains_modules_of_interest) and .. @@ -129,9 +220,16 @@ def store_chains_to_do_before_event(self) -> tuple[bool, pd.Series, pd.DataFrame # Target is single individual if self.target != self.sim.population: + # Save row for comparison after event has occurred row_before = self.sim.population.props.loc[abs(self.target)].copy().fillna(-99999) + mni = self.sim.modules['PregnancySupervisor'].mother_and_newborn_info + + if self.target in mni: + mni_instances_before = True + mni_row_before = mni[self.target].copy() + if self.sim.debug_generate_event_chains: # TO BE REMOVED This is currently just used for debugging. Will be removed from final version of PR. row = self.sim.population.props.loc[[abs(self.target)]] @@ -139,6 +237,13 @@ def store_chains_to_do_before_event(self) -> tuple[bool, pd.Series, pd.DataFrame row['event'] = type(self).__name__ row['event_date'] = self.sim.date row['when'] = 'Before' + if not mni_instances_before: + for key in self.sim.modules['PregnancySupervisor'].default_mni_values: + row[key] = self.sim.modules['PregnancySupervisor'].default_mni_values[key] + else: + for key in mni_row_before: + row[key] = mni_row_before[key] + self.sim.event_chains = pd.concat([self.sim.event_chains, row], ignore_index=True) else: @@ -146,20 +251,30 @@ def store_chains_to_do_before_event(self) -> tuple[bool, pd.Series, pd.DataFrame # This will be a population-wide event. In order to find individuals for which this led to # a meaningful change, make a copy of the pop dataframe before the event has occurred. df_before = self.sim.population.props.copy() + entire_mni_before = copy.deepcopy(self.sim.modules['PregnancySupervisor'].mother_and_newborn_info) - return print_chains, row_before, df_before + return print_chains, row_before, df_before, mni_row_before, entire_mni_before, mni_instances_before - def store_chains_to_do_after_event(self, print_chains, row_before, df_before) -> dict: + def store_chains_to_do_after_event(self, print_chains, row_before, df_before, mni_row_before, entire_mni_before, mni_instances_before) -> dict: """ If print_chains=True, this function logs the event and identifies and logs the any property changes that have occured to one or multiple individuals as a result of the event taking place. """ chain_links = {} - + + if print_chains: # Target is single individual if self.target != self.sim.population: + + mni_instances_after = False + row_after = self.sim.population.props.loc[abs(self.target)].fillna(-99999) + mni = self.sim.modules['PregnancySupervisor'].mother_and_newborn_info + + if self.target in mni: + mni_instances_after = True + # Create and store event for this individual, regardless of whether any property change occurred link_info = { #'person_ID' : self.target, @@ -167,11 +282,35 @@ def store_chains_to_do_after_event(self, print_chains, row_before, df_before) -> 'event' : type(self).__name__, 'event_date' : self.sim.date, } + # Store (if any) property changes as a result of the event for this individual for key in row_before.index: if row_before[key] != row_after[key]: # Note: used fillna previously link_info[key] = row_after[key] + # Now store changes in the mni dictionary, accounting for following cases: + + # Individual is in mni dictionary before and after + if mni_instances_before and mni_instances_after: + for key in mni_row_before: + if self.values_differ(mni_row_before[key], mni[self.target][key]): + link_info[key] = mni[self.target][key] + # Individual is only in mni dictionary before event + elif mni_instances_before and not mni_instances_after: + default = self.sim.modules['PregnancySupervisor'].default_mni_values + for key in mni_row_before: + if self.values_differ(mni_row_before[key], default[key]): + link_info[key] = default[key] + # Individual is only in mni dictionary after event + elif mni_instances_after and not mni_instances_before: + print("INDIVIDUAL WAS ADDED") + exit(-1) + default = self.sim.modules['PregnancySupervisor'].default_mni_values + for key in default: + if self.values_differ(default[key], mni[self.target][key]): + link_info[key] = mni[self.target][key] + # Else, no need to do anything + chain_links[self.target] = str(link_info) # TO BE REMOVED This is currently just used for debugging. Will be removed from final version of PR. @@ -182,6 +321,7 @@ def store_chains_to_do_after_event(self, print_chains, row_before, df_before) -> row['event'] = type(self).__name__ row['event_date'] = self.sim.date row['when'] = 'After' + self.sim.event_chains = pd.concat([self.sim.event_chains, row], ignore_index=True) else: @@ -190,9 +330,10 @@ def store_chains_to_do_after_event(self, print_chains, row_before, df_before) -> # Population frame after event df_after = self.sim.population.props + entire_mni_after = copy.deepcopy(self.sim.modules['PregnancySupervisor'].mother_and_newborn_info) # Create and store the event and dictionary of changes for affected individuals - chain_links = self.compare_population_dataframe(df_before, df_after) + chain_links = self.compare_population_dataframe(df_before, df_after, entire_mni_before, entire_mni_after) # TO BE REMOVED This is currently just used for debugging. Will be removed from final version of PR. if self.sim.debug_generate_event_chains: @@ -222,7 +363,7 @@ def run(self): # Collect relevant information before event takes place if self.sim.generate_event_chains: - print_chains, row_before, df_before = self.store_chains_to_do_before_event() + print_chains, row_before, df_before, mni_row_before, entire_mni_before, mni_instances_before = self.store_chains_to_do_before_event() self.apply(self.target) self.post_apply_hook() @@ -230,7 +371,7 @@ def run(self): # Collect event info + meaningful property changes of individuals. Combined, these will constitute a 'link' # in the individual's event chain. if self.sim.generate_event_chains: - chain_links = self.store_chains_to_do_after_event(print_chains, row_before, df_before) + chain_links = self.store_chains_to_do_after_event(print_chains, row_before, df_before, mni_row_before, entire_mni_before, mni_instances_before) # Create empty logger for entire pop pop_dict = {i: '' for i in range(FACTOR_POP_DICT)} # Always include all possible individuals @@ -238,6 +379,7 @@ def run(self): # Log chain_links here if len(chain_links)>0: + print(chain_links) logger_chain.info(key='event_chains', data= pop_dict, description='Links forming chains of events for simulated individuals') diff --git a/src/tlo/methods/hsi_event.py b/src/tlo/methods/hsi_event.py index 978b26d7c5..41342f117e 100644 --- a/src/tlo/methods/hsi_event.py +++ b/src/tlo/methods/hsi_event.py @@ -195,65 +195,83 @@ def _run_after_hsi_event(self) -> None: item_codes=self._EQUIPMENT, facility_id=self.facility_info.id ) - - def store_chains_to_do_before_event(self) -> tuple[bool, pd.Series]: + + def values_differ(self, v1, v2): + + if isinstance(v1, list) and isinstance(v2, list): + return v1 != v2 # simple element-wise comparison + + if pd.isna(v1) and pd.isna(v2): + return False # treat both NaT/NaN as equal + return v1 != v2 + + + def store_chains_to_do_before_event(self) -> tuple[bool, pd.Series, dict, bool]: """ This function checks whether this event should be logged as part of the event chains, and if so stored required information before the event has occurred. """ # Initialise these variables print_chains = False row_before = pd.Series() + mni_instances_before = False + mni_row_before = {} # Only print event if it belongs to modules of interest and if it is not in the list of events to ignore - # if (self.module in self.sim.generate_event_chains_modules_of_interest) and + #if (self.module in self.sim.generate_event_chains_modules_of_interest) and .. if all(sub not in str(self) for sub in self.sim.generate_event_chains_ignore_events): # Will eventually use this once I can actually GET THE NAME OF THE SELF - # if not set(self.sim.generate_event_chains_ignore_events).intersection(str(self)): - + #if not set(self.sim.generate_event_chains_ignore_events).intersection(str(self)): + + print_chains = True + + # Target is single individual if self.target != self.sim.population: - # In the case of HSI events, only individual events should exist and therefore be logged - print_chains = True - # Save row for comparison after event has occurred row_before = self.sim.population.props.loc[abs(self.target)].copy().fillna(-99999) - + + mni = self.sim.modules['PregnancySupervisor'].mother_and_newborn_info + + if self.target in mni: + mni_instances_before = True + mni_row_before = mni[self.target].copy() + if self.sim.debug_generate_event_chains: # TO BE REMOVED This is currently just used for debugging. Will be removed from final version of PR. row = self.sim.population.props.loc[[abs(self.target)]] row['person_ID'] = self.target - row['event'] = type(self).__name__ #str(self.event_name) + row['event'] = type(self).__name__ row['event_date'] = self.sim.date row['when'] = 'Before' - - try: - row['appt_footprint'] = str(self.EXPECTED_APPT_FOOTPRINT) - row['level'] = self.facility_info.level - except: - row['appt_footprint'] = 'N/A' - row['level'] = 'N/A' + if not mni_instances_before: + for key in self.sim.modules['PregnancySupervisor'].default_mni_values: + row[key] = self.sim.modules['PregnancySupervisor'].default_mni_values[key] + else: + for key in mni_row_before: + row[key] = mni_row_before[key] + self.sim.event_chains = pd.concat([self.sim.event_chains, row], ignore_index=True) else: - # Once this has been removed from Chronic Syndrome mock module, make this a Runtime Error - # raise RuntimeError("Cannot have population-wide HSI events") - logger.debug( - key="message", - data=( - "Cannot have population-wide HSI events" - ), - ) - + print("ERROR: there shouldn't be pop-wide HSI event") - return print_chains, row_before + return print_chains, row_before, mni_row_before, mni_instances_before - def store_chains_to_do_after_event(self, print_chains, row_before, footprint) -> dict: + def store_chains_to_do_after_event(self, print_chains, row_before, footprint, mni_row_before, mni_instances_before) -> dict: """ If print_chains=True, this function logs the event and identifies and logs the any property changes that have occured to one or multiple individuals as a result of the event taking place. """ if print_chains: # For HSI event, this will only ever occur for individual events - + chain_links = {} + row_after = self.sim.population.props.loc[abs(self.target)].fillna(-99999) + mni_instances_after = False + + mni = self.sim.modules['PregnancySupervisor'].mother_and_newborn_info + + if self.target in mni: + mni_instances_after = True + # Create and store dictionary of changes. Note that person_ID, event, event_date, appt_foot, and level # will be stored regardless of whether individual experienced property changes. @@ -278,8 +296,35 @@ def store_chains_to_do_after_event(self, print_chains, row_before, footprint) -> for key in row_before.index: if row_before[key] != row_after[key]: # Note: used fillna previously link_info[key] = row_after[key] - - chain_links = {self.target : str(link_info)} + + # Now store changes in the mni dictionary, accounting for following cases: + + # Individual is in mni dictionary before and after + if mni_instances_before and mni_instances_after: + for key in mni_row_before: + if self.values_differ(mni_row_before[key], mni[self.target][key]): + link_info[key] = mni[self.target][key] + print("--------------------------------------------->",link_info[key]) + exit(-1) + + + # Individual is only in mni dictionary before event + elif mni_instances_before and not mni_instances_after: + default = self.sim.modules['PregnancySupervisor'].default_mni_values + for key in mni_row_before: + if self.values_differ(mni_row_before[key], default[key]): + link_info[key] = default[key] + print("--------------------------------------------->",link_info[key]) + exit(-1) + # Individual is only in mni dictionary after event + elif mni_instances_after and not mni_instances_before: + default = self.sim.modules['PregnancySupervisor'].default_mni_values + for key in default: + if self.values_differ(default[key], mni[self.target][key]): + link_info[key] = mni[self.target][key] + print("--------------------------------------------->",link_info[key]) + exit(-1) + chain_links[self.target] = str(link_info) if self.sim.debug_generate_event_chains: # TO BE REMOVED This is currently just used for debugging. Will be removed from final version of PR. @@ -300,7 +345,7 @@ def run(self, squeeze_factor): if self.sim.generate_event_chains and self.target != self.sim.population: - print_chains, row_before = self.store_chains_to_do_before_event() + print_chains, row_before, mni_row_before, mni_instances_before = self.store_chains_to_do_before_event() footprint = self.EXPECTED_APPT_FOOTPRINT @@ -315,10 +360,9 @@ def run(self, squeeze_factor): if updated_appt_footprint is not None: footprint = updated_appt_footprint - chain_links = self.store_chains_to_do_after_event(print_chains, row_before, str(footprint)) + chain_links = self.store_chains_to_do_after_event(print_chains, row_before, str(footprint), mni_row_before, mni_instances_before) if len(chain_links)>0: - pop_dict = {i: '' for i in range(FACTOR_POP_DICT)} # pop_dict = {i: '' for i in range(1000)} # Always include all possible individuals diff --git a/src/tlo/methods/pregnancy_helper_functions.py b/src/tlo/methods/pregnancy_helper_functions.py index 8f7faa0503..79483cddaa 100644 --- a/src/tlo/methods/pregnancy_helper_functions.py +++ b/src/tlo/methods/pregnancy_helper_functions.py @@ -542,55 +542,7 @@ def update_mni_dictionary(self, individual_id): if self == self.sim.modules['PregnancySupervisor']: - mni[individual_id] = {'delete_mni': False, # if True, mni deleted in report_daly_values function - 'didnt_seek_care': False, - 'cons_not_avail': False, - 'comp_not_avail': False, - 'hcw_not_avail': False, - 'ga_anc_one': 0, - 'anc_ints': [], - 'abortion_onset': pd.NaT, - 'abortion_haem_onset': pd.NaT, - 'abortion_sep_onset': pd.NaT, - 'eclampsia_onset': pd.NaT, - 'mild_mod_aph_onset': pd.NaT, - 'severe_aph_onset': pd.NaT, - 'chorio_onset': pd.NaT, - 'chorio_in_preg': False, # use in predictor in newborn linear models - 'ectopic_onset': pd.NaT, - 'ectopic_rupture_onset': pd.NaT, - 'gest_diab_onset': pd.NaT, - 'gest_diab_diagnosed_onset': pd.NaT, - 'gest_diab_resolution': pd.NaT, - 'mild_anaemia_onset': pd.NaT, - 'mild_anaemia_resolution': pd.NaT, - 'moderate_anaemia_onset': pd.NaT, - 'moderate_anaemia_resolution': pd.NaT, - 'severe_anaemia_onset': pd.NaT, - 'severe_anaemia_resolution': pd.NaT, - 'mild_anaemia_pp_onset': pd.NaT, - 'mild_anaemia_pp_resolution': pd.NaT, - 'moderate_anaemia_pp_onset': pd.NaT, - 'moderate_anaemia_pp_resolution': pd.NaT, - 'severe_anaemia_pp_onset': pd.NaT, - 'severe_anaemia_pp_resolution': pd.NaT, - 'hypertension_onset': pd.NaT, - 'hypertension_resolution': pd.NaT, - 'obstructed_labour_onset': pd.NaT, - 'sepsis_onset': pd.NaT, - 'uterine_rupture_onset': pd.NaT, - 'mild_mod_pph_onset': pd.NaT, - 'severe_pph_onset': pd.NaT, - 'secondary_pph_onset': pd.NaT, - 'vesicovaginal_fistula_onset': pd.NaT, - 'vesicovaginal_fistula_resolution': pd.NaT, - 'rectovaginal_fistula_onset': pd.NaT, - 'rectovaginal_fistula_resolution': pd.NaT, - 'test_run': False, # used by labour module when running some model tests - 'pred_syph_infect': pd.NaT, # date syphilis is predicted to onset - 'new_onset_spe': False, - 'cs_indication': 'none' - } + mni[individual_id] = self.sim.modules['PregnancySupervisor'].default_mni_values.copy() elif self == self.sim.modules['Labour']: labour_variables = {'labour_state': None, diff --git a/src/tlo/methods/pregnancy_supervisor.py b/src/tlo/methods/pregnancy_supervisor.py index 7dd8819ab6..f634d9b971 100644 --- a/src/tlo/methods/pregnancy_supervisor.py +++ b/src/tlo/methods/pregnancy_supervisor.py @@ -61,6 +61,56 @@ def __init__(self, name=None, resourcefilepath=None): # This variable will store a Bitset handler for the property ps_abortion_complications self.abortion_complications = None + + self.default_mni_values = {'delete_mni': False, # if True, mni deleted in report_daly_values function + 'didnt_seek_care': False, + 'cons_not_avail': False, + 'comp_not_avail': False, + 'hcw_not_avail': False, + 'ga_anc_one': 0, + 'anc_ints': [], + 'abortion_onset': pd.NaT, + 'abortion_haem_onset': pd.NaT, + 'abortion_sep_onset': pd.NaT, + 'eclampsia_onset': pd.NaT, + 'mild_mod_aph_onset': pd.NaT, + 'severe_aph_onset': pd.NaT, + 'chorio_onset': pd.NaT, + 'chorio_in_preg': False, # use in predictor in newborn linear models + 'ectopic_onset': pd.NaT, + 'ectopic_rupture_onset': pd.NaT, + 'gest_diab_onset': pd.NaT, + 'gest_diab_diagnosed_onset': pd.NaT, + 'gest_diab_resolution': pd.NaT, + 'mild_anaemia_onset': pd.NaT, + 'mild_anaemia_resolution': pd.NaT, + 'moderate_anaemia_onset': pd.NaT, + 'moderate_anaemia_resolution': pd.NaT, + 'severe_anaemia_onset': pd.NaT, + 'severe_anaemia_resolution': pd.NaT, + 'mild_anaemia_pp_onset': pd.NaT, + 'mild_anaemia_pp_resolution': pd.NaT, + 'moderate_anaemia_pp_onset': pd.NaT, + 'moderate_anaemia_pp_resolution': pd.NaT, + 'severe_anaemia_pp_onset': pd.NaT, + 'severe_anaemia_pp_resolution': pd.NaT, + 'hypertension_onset': pd.NaT, + 'hypertension_resolution': pd.NaT, + 'obstructed_labour_onset': pd.NaT, + 'sepsis_onset': pd.NaT, + 'uterine_rupture_onset': pd.NaT, + 'mild_mod_pph_onset': pd.NaT, + 'severe_pph_onset': pd.NaT, + 'secondary_pph_onset': pd.NaT, + 'vesicovaginal_fistula_onset': pd.NaT, + 'vesicovaginal_fistula_resolution': pd.NaT, + 'rectovaginal_fistula_onset': pd.NaT, + 'rectovaginal_fistula_resolution': pd.NaT, + 'test_run': False, # used by labour module when running some model tests + 'pred_syph_infect': pd.NaT, # date syphilis is predicted to onset + 'new_onset_spe': False, + 'cs_indication': 'none' + } INIT_DEPENDENCIES = {'Demography'} diff --git a/src/tlo/methods/rti.py b/src/tlo/methods/rti.py index c79b26314d..e772366d57 100644 --- a/src/tlo/methods/rti.py +++ b/src/tlo/methods/rti.py @@ -2865,9 +2865,9 @@ def apply(self, population): Predictor('li_ex_alc').when(True, self.rr_injrti_excessalcohol) ) #if self.sim.generate_event_chains is True and self.sim.generate_event_chains_overwrite_epi is True: - pred = 1.0 + #pred = 1.0 #else: - # pred = eq.predict(df.loc[rt_current_non_ind]) + pred = eq.predict(df.loc[rt_current_non_ind]) random_draw_in_rti = self.module.rng.random_sample(size=len(rt_current_non_ind)) diff --git a/src/tlo/simulation.py b/src/tlo/simulation.py index bb766562a0..045e86bdd8 100644 --- a/src/tlo/simulation.py +++ b/src/tlo/simulation.py @@ -109,7 +109,7 @@ def __init__( self.modules = OrderedDict() self.event_queue = EventQueue() self.generate_event_chains = True - self.generate_event_chains_overwrite_epi = None + self.generate_event_chains_overwrite_epi = False self.generate_event_chains_modules_of_interest = [] self.generate_event_chains_ignore_events = [] self.debug_generate_event_chains = False @@ -299,6 +299,12 @@ def make_initial_population(self, *, n: int) -> None: if self.generate_event_chains: pop_dict = self.population.props.to_dict(orient='index') + + #if "PregnancySupervisor" in self.modules: + # print("I found it!") + # print(self.modules['PregnancySupervisor'].mother_and_newborn_info) + # exit(-1) + for key in pop_dict.keys(): pop_dict[key]['person_ID'] = key pop_dict[key] = str(pop_dict[key]) # Log as string to avoid issues around length of properties stored later @@ -329,10 +335,10 @@ def initialise(self, *, end_date: Date) -> None: #self.generate_event_chains = generate_event_chains if self.generate_event_chains: # Eventually this can be made an option - self.generate_event_chains_overwrite_epi = True + self.generate_event_chains_overwrite_epi = False # For now keep these fixed, eventually they will be input from user self.generate_event_chains_modules_of_interest = [self.modules] - self.generate_event_chains_ignore_events = ['AgeUpdateEvent','HealthSystemScheduler', 'SimplifiedBirthsPoll','DirectBirth', 'HealthSeekingBehaviourPoll', 'LifestyleEvent'] #['TbActiveCasePollGenerateData','HivPollingEventForDataGeneration','SimplifiedBirthsPoll', 'AgeUpdateEvent', 'HealthSystemScheduler'] + self.generate_event_chains_ignore_events = ['AgeUpdateEvent','HealthSystemScheduler', 'SimplifiedBirthsPoll','DirectBirth', 'LifestyleEvent', 'TbActiveCasePollGenerateData','HivPollingEventForDataGeneration','SimplifiedBirthsPoll', 'AgeUpdateEvent', 'HealthSystemScheduler', 'RTIPollingEvent'] else: # If not using to print chains, cannot ignore epi self.generate_event_chains_overwrite_epi = False @@ -491,7 +497,6 @@ def do_birth(self, mother_id: int) -> int: pop_dict = {i: '' for i in range(FACTOR_POP_DICT)} # Always include all possible individuals pop_dict[child_id] = str(prop_dict) # Convert to string to avoid issue of length - print("Length at birth", len(pop_dict)) logger.info(key='event_chains', data = pop_dict, description='Links forming chains of events for simulated individuals') diff --git a/src/tlo/util.py b/src/tlo/util.py index e246fcf05b..c9130e3f07 100644 --- a/src/tlo/util.py +++ b/src/tlo/util.py @@ -13,7 +13,7 @@ # Default mother_id value, assigned to individuals initialised as adults at the start of the simulation. DEFAULT_MOTHER_ID = -1e7 -FACTOR_POP_DICT = 1000 +FACTOR_POP_DICT = 50000 def create_age_range_lookup(min_age: int, max_age: int, range_size: int = 5) -> (list, Dict[int, str]): From 9b8f01ff383bdb0954146b93849c6c7a18008b2d Mon Sep 17 00:00:00 2001 From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com> Date: Thu, 9 Oct 2025 11:24:39 +0100 Subject: [PATCH 28/54] Tidy up --- .../analysis_extract_data.py | 2 +- src/tlo/events.py | 199 +++++++----------- src/tlo/methods/hiv.py | 32 ++- src/tlo/methods/hsi_event.py | 165 ++++++--------- src/tlo/methods/tb.py | 5 +- src/tlo/simulation.py | 41 +--- 6 files changed, 151 insertions(+), 293 deletions(-) diff --git a/src/scripts/analysis_data_generation/analysis_extract_data.py b/src/scripts/analysis_data_generation/analysis_extract_data.py index 3afad7adcc..8068db203a 100644 --- a/src/scripts/analysis_data_generation/analysis_extract_data.py +++ b/src/scripts/analysis_data_generation/analysis_extract_data.py @@ -59,7 +59,7 @@ def apply(results_folder: Path, output_folder: Path, resourcefilepath: Path = No initial_rt_event_properties = set() num_individuals = 1000 - num_runs = 50 + num_runs = 1 record = [] # Include results folder in output file name name_tag = str(results_folder).replace("outputs/", "") diff --git a/src/tlo/events.py b/src/tlo/events.py index 3a8f4f58c7..9f762fd3c6 100644 --- a/src/tlo/events.py +++ b/src/tlo/events.py @@ -77,7 +77,7 @@ def apply(self, target): """ raise NotImplementedError - def values_differ(self, v1, v2): + def mni_values_differ(self, v1, v2): if isinstance(v1, list) and isinstance(v2, list): return v1 != v2 # simple element-wise comparison @@ -111,14 +111,14 @@ def compare_entire_mni_dicts(self,entire_mni_before, entire_mni_after): for person in all_individuals: if person not in entire_mni_before: # but is afterward for key in entire_mni_after[person]: - if self.values_differ(entire_mni_after[person][key],self.sim.modules['PregnancySupervisor'].default_mni_values[key]): + if self.mni_values_differ(entire_mni_after[person][key],self.sim.modules['PregnancySupervisor'].default_mni_values[key]): if person not in diffs: diffs[person] = {} diffs[person][key] = entire_mni_after[person][key] elif person not in entire_mni_after: # but is beforehand for key in entire_mni_before[person]: - if self.values_differ(entire_mni_before[person][key],self.sim.modules['PregnancySupervisor'].default_mni_values[key]): + if self.mni_values_differ(entire_mni_before[person][key],self.sim.modules['PregnancySupervisor'].default_mni_values[key]): if person not in diffs: diffs[person] = {} diffs[person][key] = self.sim.modules['PregnancySupervisor'].default_mni_values[key] @@ -126,7 +126,7 @@ def compare_entire_mni_dicts(self,entire_mni_before, entire_mni_after): else: # person is in both # Compare properties for key in entire_mni_before[person]: - if self.values_differ(entire_mni_before[person][key],entire_mni_after[person][key]): + if self.mni_values_differ(entire_mni_before[person][key],entire_mni_after[person][key]): if person not in diffs: diffs[person] = {} diffs[person][key] = entire_mni_after[person][key] @@ -135,13 +135,12 @@ def compare_entire_mni_dicts(self,entire_mni_before, entire_mni_after): print("DIfferences for ", diffs) return diffs - def compare_population_dataframe(self,df_before, df_after, entire_mni_before, entire_mni_after): - """ This function compares the population dataframe before/after a population-wide event has occurred. + def compare_population_dataframe_and_mni(self,df_before, df_after, entire_mni_before, entire_mni_after): + """ This function compares the population dataframe and mni dictionary before/after a population-wide event has occurred. It allows us to identify the individuals for which this event led to a significant (i.e. property) change, and to store the properties which have changed as a result of it. """ # Create a mask of where values are different diff_mask = (df_before != df_after) & ~(df_before.isna() & df_after.isna()) - diff_mni = self.compare_entire_mni_dicts(entire_mni_before, entire_mni_after) # Create an empty list to store changes for each of the individuals @@ -176,12 +175,10 @@ def compare_population_dataframe(self,df_before, df_after, entire_mni_before, en # Append the event and changes to the individual key chain_links[idx] = str(link_info) - # Check individuals + # For individuals which only underwent changes in mni dictionary, save changes here if len(diff_mni)>0: - print("Non-zero changes in mni") for key in diff_mni: if key not in persons_changed: - print("Individual ", key, "is changing in mni alone") # If individual hadn't been previously added due to changes in pop df, add it here link_info = { 'person_ID': key, @@ -193,7 +190,6 @@ def compare_population_dataframe(self,df_before, df_after, entire_mni_before, en link_info[key_prop] = diff_mni[key][key_prop] chain_links[key] = str(link_info) - print("Change for ", key, " is ", str(link_info)) return chain_links @@ -210,7 +206,6 @@ def store_chains_to_do_before_event(self) -> tuple[bool, pd.Series, pd.DataFrame entire_mni_before = {} # Only print event if it belongs to modules of interest and if it is not in the list of events to ignore - #if (self.module in self.sim.generate_event_chains_modules_of_interest) and .. if all(sub not in str(self) for sub in self.sim.generate_event_chains_ignore_events): # Will eventually use this once I can actually GET THE NAME OF THE SELF @@ -224,140 +219,88 @@ def store_chains_to_do_before_event(self) -> tuple[bool, pd.Series, pd.DataFrame # Save row for comparison after event has occurred row_before = self.sim.population.props.loc[abs(self.target)].copy().fillna(-99999) + # Check if individual is already in mni dictionary, if so copy her original status mni = self.sim.modules['PregnancySupervisor'].mother_and_newborn_info - if self.target in mni: mni_instances_before = True mni_row_before = mni[self.target].copy() - - if self.sim.debug_generate_event_chains: - # TO BE REMOVED This is currently just used for debugging. Will be removed from final version of PR. - row = self.sim.population.props.loc[[abs(self.target)]] - row['person_ID'] = self.target - row['event'] = type(self).__name__ - row['event_date'] = self.sim.date - row['when'] = 'Before' - if not mni_instances_before: - for key in self.sim.modules['PregnancySupervisor'].default_mni_values: - row[key] = self.sim.modules['PregnancySupervisor'].default_mni_values[key] - else: - for key in mni_row_before: - row[key] = mni_row_before[key] - - self.sim.event_chains = pd.concat([self.sim.event_chains, row], ignore_index=True) else: # This will be a population-wide event. In order to find individuals for which this led to - # a meaningful change, make a copy of the pop dataframe before the event has occurred. + # a meaningful change, make a copy of the while pop dataframe/mni before the event has occurred. df_before = self.sim.population.props.copy() entire_mni_before = copy.deepcopy(self.sim.modules['PregnancySupervisor'].mother_and_newborn_info) return print_chains, row_before, df_before, mni_row_before, entire_mni_before, mni_instances_before - def store_chains_to_do_after_event(self, print_chains, row_before, df_before, mni_row_before, entire_mni_before, mni_instances_before) -> dict: + def store_chains_to_do_after_event(self, row_before, df_before, mni_row_before, entire_mni_before, mni_instances_before) -> dict: """ If print_chains=True, this function logs the event and identifies and logs the any property changes that have occured to one or multiple individuals as a result of the event taking place. """ chain_links = {} - - - if print_chains: - - # Target is single individual - if self.target != self.sim.population: + + # Target is single individual + if self.target != self.sim.population: + + # Copy full new status for individual + row_after = self.sim.population.props.loc[abs(self.target)].fillna(-99999) - mni_instances_after = False + # Check if individual is in mni after the event + mni_instances_after = False + mni = self.sim.modules['PregnancySupervisor'].mother_and_newborn_info + if self.target in mni: + mni_instances_after = True - row_after = self.sim.population.props.loc[abs(self.target)].fillna(-99999) - - mni = self.sim.modules['PregnancySupervisor'].mother_and_newborn_info - - if self.target in mni: - mni_instances_after = True - - # Create and store event for this individual, regardless of whether any property change occurred - link_info = { - #'person_ID' : self.target, - 'person_ID' : self.target, - 'event' : type(self).__name__, - 'event_date' : self.sim.date, - } - - # Store (if any) property changes as a result of the event for this individual - for key in row_before.index: - if row_before[key] != row_after[key]: # Note: used fillna previously - link_info[key] = row_after[key] - - # Now store changes in the mni dictionary, accounting for following cases: - - # Individual is in mni dictionary before and after - if mni_instances_before and mni_instances_after: - for key in mni_row_before: - if self.values_differ(mni_row_before[key], mni[self.target][key]): - link_info[key] = mni[self.target][key] - # Individual is only in mni dictionary before event - elif mni_instances_before and not mni_instances_after: - default = self.sim.modules['PregnancySupervisor'].default_mni_values - for key in mni_row_before: - if self.values_differ(mni_row_before[key], default[key]): - link_info[key] = default[key] - # Individual is only in mni dictionary after event - elif mni_instances_after and not mni_instances_before: - print("INDIVIDUAL WAS ADDED") - exit(-1) - default = self.sim.modules['PregnancySupervisor'].default_mni_values - for key in default: - if self.values_differ(default[key], mni[self.target][key]): - link_info[key] = mni[self.target][key] - # Else, no need to do anything - - chain_links[self.target] = str(link_info) - - # TO BE REMOVED This is currently just used for debugging. Will be removed from final version of PR. - if self.sim.debug_generate_event_chains: - # Print entire row - row = self.sim.population.props.loc[[abs(self.target)]] # Use abs to avoid potentil issue with direct births - row['person_ID'] = self.target - row['event'] = type(self).__name__ - row['event_date'] = self.sim.date - row['when'] = 'After' - - self.sim.event_chains = pd.concat([self.sim.event_chains, row], ignore_index=True) - - else: - # Target is entire population. Identify individuals for which properties have changed - # and store their changes. - - # Population frame after event - df_after = self.sim.population.props - entire_mni_after = copy.deepcopy(self.sim.modules['PregnancySupervisor'].mother_and_newborn_info) - - # Create and store the event and dictionary of changes for affected individuals - chain_links = self.compare_population_dataframe(df_before, df_after, entire_mni_before, entire_mni_after) - - # TO BE REMOVED This is currently just used for debugging. Will be removed from final version of PR. - if self.sim.debug_generate_event_chains: - # Or print entire rows - change = df_before.compare(df_after) - if not change.empty: - indices = change.index - new_rows_before = df_before.loc[indices] - new_rows_before['person_ID'] = new_rows_before.index - new_rows_before['event'] = type(self).__name__ - new_rows_before['event_date'] = self.sim.date - new_rows_before['when'] = 'Before' - - new_rows_after = df_after.loc[indices] - new_rows_after['person_ID'] = new_rows_after.index - new_rows_after['event'] = type(self).__name__ - new_rows_after['event_date'] = self.sim.date - new_rows_after['when'] = 'After' - - self.sim.event_chains = pd.concat([self.sim.event_chains,new_rows_before], ignore_index=True) - self.sim.event_chains = pd.concat([self.sim.event_chains,new_rows_after], ignore_index=True) + # Create and store event for this individual, regardless of whether any property change occurred + link_info = { + #'person_ID' : self.target, + 'person_ID' : self.target, + 'event' : type(self).__name__, + 'event_date' : self.sim.date, + } + + # Store (if any) property changes as a result of the event for this individual + for key in row_before.index: + if row_before[key] != row_after[key]: # Note: used fillna previously, so this is safe + link_info[key] = row_after[key] + + # Now check and store changes in the mni dictionary, accounting for following cases: + # Individual is in mni dictionary before and after + if mni_instances_before and mni_instances_after: + for key in mni_row_before: + if self.mni_values_differ(mni_row_before[key], mni[self.target][key]): + link_info[key] = mni[self.target][key] + # Individual is only in mni dictionary before event + elif mni_instances_before and not mni_instances_after: + default = self.sim.modules['PregnancySupervisor'].default_mni_values + for key in mni_row_before: + if self.mni_values_differ(mni_row_before[key], default[key]): + link_info[key] = default[key] + # Individual is only in mni dictionary after event + elif mni_instances_after and not mni_instances_before: + default = self.sim.modules['PregnancySupervisor'].default_mni_values + for key in default: + if self.mni_values_differ(default[key], mni[self.target][key]): + link_info[key] = mni[self.target][key] + # Else, no need to do anything + + # Add individual to the chain links + chain_links[self.target] = str(link_info) + + else: + # Target is entire population. Identify individuals for which properties have changed + # and store their changes. + + # Population frame after event + df_after = self.sim.population.props + entire_mni_after = copy.deepcopy(self.sim.modules['PregnancySupervisor'].mother_and_newborn_info) + + # Create and store the event and dictionary of changes for affected individuals + chain_links = self.compare_population_dataframe_and_mni(df_before, df_after, entire_mni_before, entire_mni_after) return chain_links + def run(self): """Make the event happen.""" @@ -370,8 +313,8 @@ def run(self): # Collect event info + meaningful property changes of individuals. Combined, these will constitute a 'link' # in the individual's event chain. - if self.sim.generate_event_chains: - chain_links = self.store_chains_to_do_after_event(print_chains, row_before, df_before, mni_row_before, entire_mni_before, mni_instances_before) + if self.sim.generate_event_chains and print_chains: + chain_links = self.store_chains_to_do_after_event(row_before, df_before, mni_row_before, entire_mni_before, mni_instances_before) # Create empty logger for entire pop pop_dict = {i: '' for i in range(FACTOR_POP_DICT)} # Always include all possible individuals @@ -384,7 +327,7 @@ def run(self): data= pop_dict, description='Links forming chains of events for simulated individuals') - #print("Chain events ", chain_links) + print("Chain events ", chain_links) class RegularEvent(Event): diff --git a/src/tlo/methods/hiv.py b/src/tlo/methods/hiv.py index 8487eaa467..0a80f8b41b 100644 --- a/src/tlo/methods/hiv.py +++ b/src/tlo/methods/hiv.py @@ -631,12 +631,11 @@ def initialise_population(self, population): df.loc[df.is_alive, "hv_date_treated"] = pd.NaT df.loc[df.is_alive, "hv_date_last_ART"] = pd.NaT - if self.sim.generate_event_chains is False or self.sim.generate_event_chains is None or self.sim.generate_event_chains_overwrite_epi is False: - # Launch sub-routines for allocating the right number of people into each category - self.initialise_baseline_prevalence(population) # allocate baseline prevalence + # Launch sub-routines for allocating the right number of people into each category + self.initialise_baseline_prevalence(population) # allocate baseline prevalence - self.initialise_baseline_art(population) # allocate baseline art coverage - self.initialise_baseline_tested(population) # allocate baseline testing coverage + self.initialise_baseline_art(population) # allocate baseline art coverage + self.initialise_baseline_tested(population) # allocate baseline testing coverage def initialise_baseline_prevalence(self, population): """ @@ -906,16 +905,10 @@ def initialise_simulation(self, sim): df = sim.population.props p = self.parameters - if self.sim.generate_event_chains is True and self.sim.generate_event_chains_overwrite_epi: - print("Should be generating data") - sim.schedule_event( - HivPollingEventForDataGeneration(self), sim.date + DateOffset(days=0) - ) - else: - # 1) Schedule the Main HIV Regular Polling Event - sim.schedule_event( - HivRegularPollingEvent(self), sim.date + DateOffset(days=0) - ) + # 1) Schedule the Main HIV Regular Polling Event + sim.schedule_event( + HivRegularPollingEvent(self), sim.date + DateOffset(days=0) + ) # 2) Schedule the Logging Event sim.schedule_event(HivLoggingEvent(self), sim.date + DateOffset(years=1)) @@ -1901,12 +1894,11 @@ def vmmc_for_child(): priority=0, ) - if self.sim.generate_event_chains is False or self.sim.generate_event_chains is None or self.sim.generate_event_chains_overwrite_epi is False: - # Horizontal transmission: Male --> Female - horizontal_transmission(from_sex="M", to_sex="F") + # Horizontal transmission: Male --> Female + horizontal_transmission(from_sex="M", to_sex="F") - # Horizontal transmission: Female --> Male - horizontal_transmission(from_sex="F", to_sex="M") + # Horizontal transmission: Female --> Male + horizontal_transmission(from_sex="F", to_sex="M") # testing # if year later than 2020, set testing rates to those reported in 2020 diff --git a/src/tlo/methods/hsi_event.py b/src/tlo/methods/hsi_event.py index 41342f117e..dbca98da5c 100644 --- a/src/tlo/methods/hsi_event.py +++ b/src/tlo/methods/hsi_event.py @@ -216,8 +216,7 @@ def store_chains_to_do_before_event(self) -> tuple[bool, pd.Series, dict, bool]: mni_row_before = {} # Only print event if it belongs to modules of interest and if it is not in the list of events to ignore - #if (self.module in self.sim.generate_event_chains_modules_of_interest) and .. - if all(sub not in str(self) for sub in self.sim.generate_event_chains_ignore_events): + if (self.module in self.sim.generate_event_chains_modules_of_interest) and all(sub not in str(self) for sub in self.sim.generate_event_chains_ignore_events): # Will eventually use this once I can actually GET THE NAME OF THE SELF #if not set(self.sim.generate_event_chains_ignore_events).intersection(str(self)): @@ -230,112 +229,75 @@ def store_chains_to_do_before_event(self) -> tuple[bool, pd.Series, dict, bool]: # Save row for comparison after event has occurred row_before = self.sim.population.props.loc[abs(self.target)].copy().fillna(-99999) + # Check if individual is in mni dictionary before the event, if so store its original status mni = self.sim.modules['PregnancySupervisor'].mother_and_newborn_info - if self.target in mni: mni_instances_before = True mni_row_before = mni[self.target].copy() - - if self.sim.debug_generate_event_chains: - # TO BE REMOVED This is currently just used for debugging. Will be removed from final version of PR. - row = self.sim.population.props.loc[[abs(self.target)]] - row['person_ID'] = self.target - row['event'] = type(self).__name__ - row['event_date'] = self.sim.date - row['when'] = 'Before' - if not mni_instances_before: - for key in self.sim.modules['PregnancySupervisor'].default_mni_values: - row[key] = self.sim.modules['PregnancySupervisor'].default_mni_values[key] - else: - for key in mni_row_before: - row[key] = mni_row_before[key] - - self.sim.event_chains = pd.concat([self.sim.event_chains, row], ignore_index=True) else: print("ERROR: there shouldn't be pop-wide HSI event") + exit(-1) return print_chains, row_before, mni_row_before, mni_instances_before - def store_chains_to_do_after_event(self, print_chains, row_before, footprint, mni_row_before, mni_instances_before) -> dict: + def store_chains_to_do_after_event(self, row_before, footprint, mni_row_before, mni_instances_before) -> dict: """ If print_chains=True, this function logs the event and identifies and logs the any property changes that have occured to one or multiple individuals as a result of the event taking place. """ - if print_chains: - # For HSI event, this will only ever occur for individual events - chain_links = {} - row_after = self.sim.population.props.loc[abs(self.target)].fillna(-99999) - - mni_instances_after = False - - mni = self.sim.modules['PregnancySupervisor'].mother_and_newborn_info - - if self.target in mni: - mni_instances_after = True - - # Create and store dictionary of changes. Note that person_ID, event, event_date, appt_foot, and level - # will be stored regardless of whether individual experienced property changes. + # For HSI event, this will only ever occur for individual events + chain_links = {} - # Add event details + row_after = self.sim.population.props.loc[abs(self.target)].fillna(-99999) + + mni_instances_after = False + mni = self.sim.modules['PregnancySupervisor'].mother_and_newborn_info + if self.target in mni: + mni_instances_after = True - try: - record_footprint = str(footprint) - record_level = self.facility_info.level - except: - record_footprint = 'N/A' - record_level = 'N/A' - - link_info = { - 'person_ID': self.target, - 'event' : type(self).__name__, - 'event_date' : self.sim.date, - 'appt_footprint' : record_footprint, - 'level' : record_level, - } + # Create and store dictionary of changes. Note that person_ID, event, event_date, appt_foot, and level + # will be stored regardless of whether individual experienced property changes or not. + + # Add event details + try: + record_footprint = str(footprint) + record_level = self.facility_info.level + except: + record_footprint = 'N/A' + record_level = 'N/A' - # Add changes to properties - for key in row_before.index: - if row_before[key] != row_after[key]: # Note: used fillna previously - link_info[key] = row_after[key] - - # Now store changes in the mni dictionary, accounting for following cases: + link_info = { + 'person_ID': self.target, + 'event' : type(self).__name__, + 'event_date' : self.sim.date, + 'appt_footprint' : record_footprint, + 'level' : record_level, + } + + # Add changes to properties + for key in row_before.index: + if row_before[key] != row_after[key]: # Note: used fillna previously + link_info[key] = row_after[key] - # Individual is in mni dictionary before and after - if mni_instances_before and mni_instances_after: - for key in mni_row_before: - if self.values_differ(mni_row_before[key], mni[self.target][key]): - link_info[key] = mni[self.target][key] - print("--------------------------------------------->",link_info[key]) - exit(-1) - - - # Individual is only in mni dictionary before event - elif mni_instances_before and not mni_instances_after: - default = self.sim.modules['PregnancySupervisor'].default_mni_values - for key in mni_row_before: - if self.values_differ(mni_row_before[key], default[key]): - link_info[key] = default[key] - print("--------------------------------------------->",link_info[key]) - exit(-1) - # Individual is only in mni dictionary after event - elif mni_instances_after and not mni_instances_before: - default = self.sim.modules['PregnancySupervisor'].default_mni_values - for key in default: - if self.values_differ(default[key], mni[self.target][key]): - link_info[key] = mni[self.target][key] - print("--------------------------------------------->",link_info[key]) - exit(-1) - chain_links[self.target] = str(link_info) - - if self.sim.debug_generate_event_chains: - # TO BE REMOVED This is currently just used for debugging. Will be removed from final version of PR. - row = self.sim.population.props.loc[[abs(self.target)]] - row['person_ID'] = self.target - row['event'] = type(self).__name__ - row['event_date'] = self.sim.date - row['when'] = 'After' - row['appt_footprint'] = record_footprint - row['level'] = record_level - self.sim.event_chains = pd.concat([self.sim.event_chains, row], ignore_index=True) + # Now store changes in the mni dictionary, accounting for following cases: + # Individual is in mni dictionary before and after + if mni_instances_before and mni_instances_after: + for key in mni_row_before: + if self.values_differ(mni_row_before[key], mni[self.target][key]): + link_info[key] = mni[self.target][key] + # Individual is only in mni dictionary before event + elif mni_instances_before and not mni_instances_after: + default = self.sim.modules['PregnancySupervisor'].default_mni_values + for key in mni_row_before: + if self.values_differ(mni_row_before[key], default[key]): + link_info[key] = default[key] + # Individual is only in mni dictionary after event + elif mni_instances_after and not mni_instances_before: + default = self.sim.modules['PregnancySupervisor'].default_mni_values + for key in default: + if self.values_differ(default[key], mni[self.target][key]): + link_info[key] = mni[self.target][key] + + chain_links[self.target] = str(link_info) return chain_links @@ -360,17 +322,16 @@ def run(self, squeeze_factor): if updated_appt_footprint is not None: footprint = updated_appt_footprint - chain_links = self.store_chains_to_do_after_event(print_chains, row_before, str(footprint), mni_row_before, mni_instances_before) + if print_chains: + chain_links = self.store_chains_to_do_after_event(row_before, str(footprint), mni_row_before, mni_instances_before) - if len(chain_links)>0: - pop_dict = {i: '' for i in range(FACTOR_POP_DICT)} - # pop_dict = {i: '' for i in range(1000)} # Always include all possible individuals - - pop_dict.update(chain_links) - - logger_chains.info(key='event_chains', - data = pop_dict, - description='Links forming chains of events for simulated individuals') + if len(chain_links)>0: + pop_dict = {i: '' for i in range(FACTOR_POP_DICT)} + pop_dict.update(chain_links) + + logger_chains.info(key='event_chains', + data = pop_dict, + description='Links forming chains of events for simulated individuals') return updated_appt_footprint diff --git a/src/tlo/methods/tb.py b/src/tlo/methods/tb.py index 33edeb63c8..fe5d19c964 100644 --- a/src/tlo/methods/tb.py +++ b/src/tlo/methods/tb.py @@ -890,10 +890,7 @@ def initialise_simulation(self, sim): sim.schedule_event(TbRegularEvents(self), sim.date) sim.schedule_event(TbSelfCureEvent(self), sim.date) - if sim.generate_event_chains is True and sim.generate_event_chains_overwrite_epi is True: - sim.schedule_event(TbActiveCasePollGenerateData(self), sim.date + DateOffset(days=0)) - else: - sim.schedule_event(TbActiveCasePoll(self), sim.date + DateOffset(years=1)) + sim.schedule_event(TbActiveCasePoll(self), sim.date + DateOffset(years=1)) # 2) log at the end of the year diff --git a/src/tlo/simulation.py b/src/tlo/simulation.py index 045e86bdd8..8356424901 100644 --- a/src/tlo/simulation.py +++ b/src/tlo/simulation.py @@ -109,19 +109,13 @@ def __init__( self.modules = OrderedDict() self.event_queue = EventQueue() self.generate_event_chains = True - self.generate_event_chains_overwrite_epi = False self.generate_event_chains_modules_of_interest = [] self.generate_event_chains_ignore_events = [] - self.debug_generate_event_chains = False self.end_date = None self.output_file = None self.population: Optional[Population] = None - - if self.debug_generate_event_chains: - # TO BE REMOVED This is currently just used for debugging. Will be removed from final version of PR. - self.event_chains: Optional[Population] = None - + self.show_progress_bar = show_progress_bar self.resourcefilepath = resourcefilepath @@ -289,21 +283,12 @@ def make_initial_population(self, *, n: int) -> None: key="debug", data=f"{module.name}.initialise_population() {time.time() - start1} s", ) - - if self.debug_generate_event_chains: - # TO BE REMOVED This is currently just used for debugging. Will be removed from final version of PR. - self.event_chains = pd.DataFrame(columns= list(self.population.props.columns)+['person_ID'] + ['event'] + ['event_date'] + ['when'] + ['appt_footprint'] + ['level']) # When logging events for each individual to reconstruct chains, only the changes in individual properties will be logged. # At the start of the simulation + when a new individual is born, we therefore want to store all of their properties at the start. if self.generate_event_chains: pop_dict = self.population.props.to_dict(orient='index') - - #if "PregnancySupervisor" in self.modules: - # print("I found it!") - # print(self.modules['PregnancySupervisor'].mother_and_newborn_info) - # exit(-1) for key in pop_dict.keys(): pop_dict[key]['person_ID'] = key @@ -311,12 +296,11 @@ def make_initial_population(self, *, n: int) -> None: pop_dict_full = {i: '' for i in range(FACTOR_POP_DICT)} pop_dict_full.update(pop_dict) - - print("Size for full sim", len(pop_dict_full)) logger.info(key='event_chains', data = pop_dict_full, description='Links forming chains of events for simulated individuals') + end = time.time() logger.info(key="info", data=f"make_initial_population() {end - start} s") @@ -334,15 +318,9 @@ def initialise(self, *, end_date: Date) -> None: #self.generate_event_chains = generate_event_chains if self.generate_event_chains: - # Eventually this can be made an option - self.generate_event_chains_overwrite_epi = False # For now keep these fixed, eventually they will be input from user self.generate_event_chains_modules_of_interest = [self.modules] - self.generate_event_chains_ignore_events = ['AgeUpdateEvent','HealthSystemScheduler', 'SimplifiedBirthsPoll','DirectBirth', 'LifestyleEvent', 'TbActiveCasePollGenerateData','HivPollingEventForDataGeneration','SimplifiedBirthsPoll', 'AgeUpdateEvent', 'HealthSystemScheduler', 'RTIPollingEvent'] - else: - # If not using to print chains, cannot ignore epi - self.generate_event_chains_overwrite_epi = False - + self.generate_event_chains_ignore_events = ['AgeUpdateEvent','HealthSystemScheduler', 'SimplifiedBirthsPoll','DirectBirth', 'LifestyleEvent', 'TbActiveCasePollGenerateData','HivPollingEventForDataGeneration', 'RTIPollingEvent'] # Reorder columns to place the new columns at the front pd.set_option('display.max_columns', None) @@ -426,10 +404,6 @@ def run_simulation_to(self, *, to_date: Date) -> None: self._update_progress_bar(progress_bar, date) self.fire_single_event(event, date) self.date = to_date - - if self.debug_generate_event_chains: - # TO BE REMOVED: this is currently only used for debugging, will be removed from final PR. - self.event_chains.to_csv('output.csv', index=False) if self.show_progress_bar: progress_bar.stop() @@ -500,15 +474,6 @@ def do_birth(self, mother_id: int) -> int: logger.info(key='event_chains', data = pop_dict, description='Links forming chains of events for simulated individuals') - - if self.debug_generate_event_chains: - # TO BE REMOVED This is currently just used for debugging. Will be removed from final version of PR. - row = self.population.props.iloc[[child_id]] - row['person_ID'] = child_id - row['event'] = 'Birth' - row['event_date'] = self.date - row['when'] = 'After' - self.event_chains = pd.concat([self.event_chains, row], ignore_index=True) return child_id From 3b81de6546cb498938ff9918c852e39369b29ca3 Mon Sep 17 00:00:00 2001 From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com> Date: Thu, 9 Oct 2025 13:32:50 +0100 Subject: [PATCH 29/54] All fixes made --- .../analysis_extract_data.py | 8 +++- .../scenario_generate_chains.py | 2 +- src/tlo/events.py | 33 +++---------- src/tlo/methods/hsi_event.py | 4 +- src/tlo/methods/pregnancy_helper_functions.py | 46 ++++--------------- src/tlo/methods/pregnancy_supervisor.py | 40 ++++++++++++++++ 6 files changed, 64 insertions(+), 69 deletions(-) diff --git a/src/scripts/analysis_data_generation/analysis_extract_data.py b/src/scripts/analysis_data_generation/analysis_extract_data.py index 8068db203a..7fe15f0eb4 100644 --- a/src/scripts/analysis_data_generation/analysis_extract_data.py +++ b/src/scripts/analysis_data_generation/analysis_extract_data.py @@ -98,7 +98,11 @@ def apply(results_folder: Path, output_folder: Path, resourcefilepath: Path = No if value !='' and isinstance(value, str): evaluated = eval(value, eval_env) list_for_individual.append(evaluated) - + + for i in list_for_individual: + print(i) + + """ # These are the properties of the individual before the start of the chain of events initial_properties = list_for_individual[0] @@ -201,7 +205,7 @@ def apply(results_folder: Path, output_folder: Path, resourcefilepath: Path = No properties = key_first_event | key_last_event record.append(properties) - + """ df = pd.DataFrame(record) df.to_csv("new_raw_data_" + name_tag + ".csv", index=False) diff --git a/src/scripts/analysis_data_generation/scenario_generate_chains.py b/src/scripts/analysis_data_generation/scenario_generate_chains.py index 64fa70d055..e9291a50ce 100644 --- a/src/scripts/analysis_data_generation/scenario_generate_chains.py +++ b/src/scripts/analysis_data_generation/scenario_generate_chains.py @@ -53,7 +53,7 @@ def __init__(self): super().__init__() self.seed = 42 self.start_date = Date(2010, 1, 1) - self.end_date = self.start_date + pd.DateOffset(months=36) + self.end_date = self.start_date + pd.DateOffset(months=18) self.pop_size = 1000 self._scenarios = self._get_scenarios() self.number_of_draws = len(self._scenarios) diff --git a/src/tlo/events.py b/src/tlo/events.py index 9f762fd3c6..993c27090c 100644 --- a/src/tlo/events.py +++ b/src/tlo/events.py @@ -88,40 +88,23 @@ def mni_values_differ(self, v1, v2): def compare_entire_mni_dicts(self,entire_mni_before, entire_mni_after): diffs = {} - """ - will_pause = False - - target_attribute = 'hcw_not_avail' - if len(entire_mni_after)>0: - print("Default target value before", self.sim.modules['PregnancySupervisor'].default_mni_values[target_attribute]) - person = next(iter(entire_mni_after)) - entire_mni_after[person][target_attribute] = True - will_pause = True - print("Default target value after", self.sim.modules['PregnancySupervisor'].default_mni_values[target_attribute]) - - if will_pause: - print("Reprint") - print(entire_mni_before) - print(entire_mni_after) - print("Default target value", self.sim.modules['PregnancySupervisor'].default_mni_values[target_attribute]) - """ all_individuals = set(entire_mni_before.keys()) | set(entire_mni_after.keys()) for person in all_individuals: if person not in entire_mni_before: # but is afterward for key in entire_mni_after[person]: - if self.mni_values_differ(entire_mni_after[person][key],self.sim.modules['PregnancySupervisor'].default_mni_values[key]): + if self.mni_values_differ(entire_mni_after[person][key],self.sim.modules['PregnancySupervisor'].default_all_mni_values[key]): if person not in diffs: diffs[person] = {} diffs[person][key] = entire_mni_after[person][key] elif person not in entire_mni_after: # but is beforehand for key in entire_mni_before[person]: - if self.mni_values_differ(entire_mni_before[person][key],self.sim.modules['PregnancySupervisor'].default_mni_values[key]): + if self.mni_values_differ(entire_mni_before[person][key],self.sim.modules['PregnancySupervisor'].default_all_mni_values[key]): if person not in diffs: diffs[person] = {} - diffs[person][key] = self.sim.modules['PregnancySupervisor'].default_mni_values[key] + diffs[person][key] = self.sim.modules['PregnancySupervisor'].default_all_mni_values[key] else: # person is in both # Compare properties @@ -131,8 +114,6 @@ def compare_entire_mni_dicts(self,entire_mni_before, entire_mni_after): diffs[person] = {} diffs[person][key] = entire_mni_after[person][key] - if len(diffs)>0: - print("DIfferences for ", diffs) return diffs def compare_population_dataframe_and_mni(self,df_before, df_after, entire_mni_before, entire_mni_after): @@ -272,13 +253,13 @@ def store_chains_to_do_after_event(self, row_before, df_before, mni_row_before, link_info[key] = mni[self.target][key] # Individual is only in mni dictionary before event elif mni_instances_before and not mni_instances_after: - default = self.sim.modules['PregnancySupervisor'].default_mni_values + default = self.sim.modules['PregnancySupervisor'].default_all_mni_values for key in mni_row_before: if self.mni_values_differ(mni_row_before[key], default[key]): link_info[key] = default[key] # Individual is only in mni dictionary after event elif mni_instances_after and not mni_instances_before: - default = self.sim.modules['PregnancySupervisor'].default_mni_values + default = self.sim.modules['PregnancySupervisor'].default_all_mni_values for key in default: if self.mni_values_differ(default[key], mni[self.target][key]): link_info[key] = mni[self.target][key] @@ -322,12 +303,10 @@ def run(self): # Log chain_links here if len(chain_links)>0: - print(chain_links) + logger_chain.info(key='event_chains', data= pop_dict, description='Links forming chains of events for simulated individuals') - - print("Chain events ", chain_links) class RegularEvent(Event): diff --git a/src/tlo/methods/hsi_event.py b/src/tlo/methods/hsi_event.py index dbca98da5c..85ac6da3e2 100644 --- a/src/tlo/methods/hsi_event.py +++ b/src/tlo/methods/hsi_event.py @@ -286,13 +286,13 @@ def store_chains_to_do_after_event(self, row_before, footprint, mni_row_before, link_info[key] = mni[self.target][key] # Individual is only in mni dictionary before event elif mni_instances_before and not mni_instances_after: - default = self.sim.modules['PregnancySupervisor'].default_mni_values + default = self.sim.modules['PregnancySupervisor'].default_all_mni_values for key in mni_row_before: if self.values_differ(mni_row_before[key], default[key]): link_info[key] = default[key] # Individual is only in mni dictionary after event elif mni_instances_after and not mni_instances_before: - default = self.sim.modules['PregnancySupervisor'].default_mni_values + default = self.sim.modules['PregnancySupervisor'].default_all_mni_values for key in default: if self.values_differ(default[key], mni[self.target][key]): link_info[key] = mni[self.target][key] diff --git a/src/tlo/methods/pregnancy_helper_functions.py b/src/tlo/methods/pregnancy_helper_functions.py index 79483cddaa..2456f57e8b 100644 --- a/src/tlo/methods/pregnancy_helper_functions.py +++ b/src/tlo/methods/pregnancy_helper_functions.py @@ -545,40 +545,12 @@ def update_mni_dictionary(self, individual_id): mni[individual_id] = self.sim.modules['PregnancySupervisor'].default_mni_values.copy() elif self == self.sim.modules['Labour']: - labour_variables = {'labour_state': None, - # Term Labour (TL), Early Preterm (EPTL), Late Preterm (LPTL) or Post Term (POTL) - 'birth_weight': 'normal_birth_weight', - 'birth_size': 'average_for_gestational_age', - 'delivery_setting': None, # home_birth, health_centre, hospital - 'twins': df.at[individual_id, 'ps_multiple_pregnancy'], - 'twin_count': 0, - 'twin_one_comps': False, - 'pnc_twin_one': 'none', - 'bf_status_twin_one': 'none', - 'eibf_status_twin_one': False, - 'an_placental_abruption': df.at[individual_id, 'ps_placental_abruption'], - 'corticosteroids_given': False, - 'clean_birth_practices': False, - 'abx_for_prom_given': False, - 'abx_for_pprom_given': False, - 'endo_pp': False, - 'retained_placenta': False, - 'uterine_atony': False, - 'amtsl_given': False, - 'cpd': False, - 'mode_of_delivery': 'vaginal_delivery', - 'neo_will_receive_resus_if_needed': False, - # vaginal_delivery, instrumental, caesarean_section - 'hsi_cant_run': False, # True (T) or False (F) - 'sought_care_for_complication': False, # True (T) or False (F) - 'sought_care_labour_phase': 'none', - 'referred_for_cs': False, # True (T) or False (F) - 'referred_for_blood': False, # True (T) or False (F) - 'received_blood_transfusion': False, # True (T) or False (F) - 'referred_for_surgery': False, # True (T) or False (F)' - 'death_in_labour': False, # True (T) or False (F) - 'single_twin_still_birth': False, # True (T) or False (F) - 'will_receive_pnc': 'none', - 'passed_through_week_one': False} - - mni[individual_id].update(labour_variables) + + labour_default = self.sim.modules['PregnancySupervisor'].default_labour_values.copy() + mni[individual_id].update(labour_default) + + # Update from default based on individual case + mni[individual_id]['twins'] = df.at[individual_id, 'ps_multiple_pregnancy'] + mni[individual_id]['an_placental_abruption'] = df.at[individual_id, 'ps_placental_abruption'] + + diff --git a/src/tlo/methods/pregnancy_supervisor.py b/src/tlo/methods/pregnancy_supervisor.py index f634d9b971..5d747d44c2 100644 --- a/src/tlo/methods/pregnancy_supervisor.py +++ b/src/tlo/methods/pregnancy_supervisor.py @@ -82,6 +82,8 @@ def __init__(self, name=None, resourcefilepath=None): 'gest_diab_onset': pd.NaT, 'gest_diab_diagnosed_onset': pd.NaT, 'gest_diab_resolution': pd.NaT, + 'none_anaemia_onset': pd.NaT, + 'none_anaemia_resolution': pd.NaT, 'mild_anaemia_onset': pd.NaT, 'mild_anaemia_resolution': pd.NaT, 'moderate_anaemia_onset': pd.NaT, @@ -111,6 +113,44 @@ def __init__(self, name=None, resourcefilepath=None): 'new_onset_spe': False, 'cs_indication': 'none' } + self.default_labour_values = {'labour_state': None, + # Term Labour (TL), Early Preterm (EPTL), Late Preterm (LPTL) or Post Term (POTL) + 'birth_weight': 'normal_birth_weight', + 'birth_size': 'average_for_gestational_age', + 'delivery_setting': None, # home_birth, health_centre, hospital + 'twins': None, + 'twin_count': 0, + 'twin_one_comps': False, + 'pnc_twin_one': 'none', + 'bf_status_twin_one': 'none', + 'eibf_status_twin_one': False, + 'an_placental_abruption': None, + 'corticosteroids_given': False, + 'clean_birth_practices': False, + 'abx_for_prom_given': False, + 'abx_for_pprom_given': False, + 'endo_pp': False, + 'retained_placenta': False, + 'uterine_atony': False, + 'amtsl_given': False, + 'cpd': False, + 'mode_of_delivery': 'vaginal_delivery', + 'neo_will_receive_resus_if_needed': False, + # vaginal_delivery, instrumental, caesarean_section + 'hsi_cant_run': False, # True (T) or False (F) + 'sought_care_for_complication': False, # True (T) or False (F) + 'sought_care_labour_phase': 'none', + 'referred_for_cs': False, # True (T) or False (F) + 'referred_for_blood': False, # True (T) or False (F) + 'received_blood_transfusion': False, # True (T) or False (F) + 'referred_for_surgery': False, # True (T) or False (F)' + 'death_in_labour': False, # True (T) or False (F) + 'single_twin_still_birth': False, # True (T) or False (F) + 'will_receive_pnc': 'none', + 'passed_through_week_one': False} + + self.default_all_mni_values = self.default_mni_values + self.default_all_mni_values.update(self.default_labour_values) INIT_DEPENDENCIES = {'Demography'} From bc61e1efbf7c79c4b85273b5b3c893c0030b362d Mon Sep 17 00:00:00 2001 From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com> Date: Mon, 17 Nov 2025 10:21:22 +0000 Subject: [PATCH 30/54] Cleaned and [skip ci] --- src/tlo/simulation.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/tlo/simulation.py b/src/tlo/simulation.py index 8356424901..ef2fe4518e 100644 --- a/src/tlo/simulation.py +++ b/src/tlo/simulation.py @@ -108,9 +108,11 @@ def __init__( self.date = self.start_date = start_date self.modules = OrderedDict() self.event_queue = EventQueue() + self.generate_event_chains = True self.generate_event_chains_modules_of_interest = [] self.generate_event_chains_ignore_events = [] + self.end_date = None self.output_file = None self.population: Optional[Population] = None From e084e3949c03a8e19bc49f42aea56a154d09dabf Mon Sep 17 00:00:00 2001 From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com> Date: Thu, 20 Nov 2025 14:38:07 +0000 Subject: [PATCH 31/54] Start logging data in EAV format --- src/tlo/events.py | 17 ++++++++++------- src/tlo/simulation.py | 11 ++++++++++- src/tlo/util.py | 23 +++++++++++++++++++++++ 3 files changed, 43 insertions(+), 8 deletions(-) diff --git a/src/tlo/events.py b/src/tlo/events.py index 993c27090c..9e9865cdad 100644 --- a/src/tlo/events.py +++ b/src/tlo/events.py @@ -11,7 +11,7 @@ import pandas as pd -from tlo.util import FACTOR_POP_DICT +from tlo.util import FACTOR_POP_DICT, convert_dict_into_eav import copy @@ -233,12 +233,12 @@ def store_chains_to_do_after_event(self, row_before, df_before, mni_row_before, mni_instances_after = True # Create and store event for this individual, regardless of whether any property change occurred - link_info = { - #'person_ID' : self.target, - 'person_ID' : self.target, - 'event' : type(self).__name__, - 'event_date' : self.sim.date, - } + link_info = {} + # #'person_ID' : self.target, + # 'person_ID' : self.target, + # 'event' : type(self).__name__, + # 'event_date' : self.sim.date, + #} # Store (if any) property changes as a result of the event for this individual for key in row_before.index: @@ -265,6 +265,9 @@ def store_chains_to_do_after_event(self, row_before, df_before, mni_row_before, link_info[key] = mni[self.target][key] # Else, no need to do anything + eav = convert_dict_into_eav(link_info, self.target, self.sim.date, type(self).__name__) + print(eav) + exit(-1) # Add individual to the chain links chain_links[self.target] = str(link_info) diff --git a/src/tlo/simulation.py b/src/tlo/simulation.py index ef2fe4518e..ef27fa6381 100644 --- a/src/tlo/simulation.py +++ b/src/tlo/simulation.py @@ -13,7 +13,7 @@ import pandas as pd import tlo.population import numpy as np -from tlo.util import FACTOR_POP_DICT +from tlo.util import FACTOR_POP_DICT, df_to_eav try: import dill @@ -290,6 +290,11 @@ def make_initial_population(self, *, n: int) -> None: # At the start of the simulation + when a new individual is born, we therefore want to store all of their properties at the start. if self.generate_event_chains: + print(len(self.population.props), n) + # EAV structure to capture status of individuals at the start of the simulation + eav = df_to_eav(self.population.props, self.date, 'StartOfSimulation') + + """ pop_dict = self.population.props.to_dict(orient='index') for key in pop_dict.keys(): @@ -302,6 +307,10 @@ def make_initial_population(self, *, n: int) -> None: logger.info(key='event_chains', data = pop_dict_full, description='Links forming chains of events for simulated individuals') + """ + logger.info(key='event_chains', + data = eav.to_dict(), + description='Links forming chains of events for simulated individuals') end = time.time() logger.info(key="info", data=f"make_initial_population() {end - start} s") diff --git a/src/tlo/util.py b/src/tlo/util.py index c9130e3f07..e83e19baab 100644 --- a/src/tlo/util.py +++ b/src/tlo/util.py @@ -94,6 +94,29 @@ def transition_states(initial_series: pd.Series, prob_matrix: pd.DataFrame, rng: return final_states +def df_to_eav(df, date, event_name): + """Function to convert dataframe into EAV""" + eav = df.stack().reset_index() + eav.columns = ['E', 'A', 'V'] + eav['Date'] = date + eav['NameEvent'] = event_name + eav = eav[["E", "Date", "NameEvent", "A", "V"]] + + return eav + + +def convert_dict_into_eav(link_info, target, date, event_name): + "Function to convert link info in the form of dictionary into an EAV" + eav = pd.DataFrame(list(link_info.items()), columns=['A', 'V']) + eav.columns = ['A', 'V'] + eav['E'] = target + eav['Date'] = date + eav['NameEvent'] = event_name + eav = eav[['E', 'Date', 'NameEvent', 'A', 'V']] + + return eav + + def sample_outcome(probs: pd.DataFrame, rng: np.random.RandomState): """ Helper function to randomly sample an outcome for each individual in a population from a set of probabilities that are specific to each individual. From ac617e80ff416976229b3f3bdd915198a26da96c Mon Sep 17 00:00:00 2001 From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com> Date: Fri, 21 Nov 2025 13:15:07 +0000 Subject: [PATCH 32/54] Log event chains via EAV approach --- .../analysis_extract_data.py | 27 ++++- .../scenario_generate_chains.py | 4 +- src/tlo/analysis/utils.py | 111 ++++++++++++++++++ src/tlo/events.py | 41 +++---- src/tlo/methods/hsi_event.py | 21 ++-- src/tlo/simulation.py | 40 ++----- src/tlo/util.py | 30 +++-- 7 files changed, 200 insertions(+), 74 deletions(-) diff --git a/src/scripts/analysis_data_generation/analysis_extract_data.py b/src/scripts/analysis_data_generation/analysis_extract_data.py index 7fe15f0eb4..9ee37cabef 100644 --- a/src/scripts/analysis_data_generation/analysis_extract_data.py +++ b/src/scripts/analysis_data_generation/analysis_extract_data.py @@ -11,7 +11,7 @@ import matplotlib.pyplot as plt from tlo import Date -from tlo.analysis.utils import extract_results +from tlo.analysis.utils import extract_results, extract_event_chains from datetime import datetime from collections import Counter import ast @@ -35,6 +35,27 @@ def check_if_beyond_time_range_considered(progression_properties): if progression_properties[key] > end_date: print("Beyond time range considered, need at least ",progression_properties[key]) +def print_filtered_df(df): + """ + Prints rows of the DataFrame excluding EventName 'Initialise' and 'Birth'. + """ + pd.set_option('display.max_colwidth', None) + filtered = df#[~df['EventName'].isin(['StartOfSimulation', 'Birth'])] + + dict_cols = ["Info"] + max_items = 2 + # Step 2: Truncate dictionary columns for display + if dict_cols is not None: + for col in dict_cols: + def truncate_dict(d): + if isinstance(d, dict): + items = list(d.items())[:max_items] # keep only first `max_items` + return dict(items) + return d + filtered[col] = filtered[col].apply(truncate_dict) + print(filtered) + + def apply(results_folder: Path, output_folder: Path, resourcefilepath: Path = None, ): """Produce standard set of plots describing the effect of each TREATMENT_ID. - We estimate the epidemiological impact as the EXTRA deaths that would occur if that treatment did not occur. @@ -43,6 +64,10 @@ def apply(results_folder: Path, output_folder: Path, resourcefilepath: Path = No pd.set_option('display.max_rows', None) pd.set_option('display.max_colwidth', None) + individual_event_chains = extract_event_chains(results_folder) + print_filtered_df(individual_event_chains[0]) + exit(-1) + eval_env = { 'datetime': datetime, # Add the datetime class to the eval environment 'pd': pd, # Add pandas to handle Timestamp diff --git a/src/scripts/analysis_data_generation/scenario_generate_chains.py b/src/scripts/analysis_data_generation/scenario_generate_chains.py index e9291a50ce..6cfbd040fa 100644 --- a/src/scripts/analysis_data_generation/scenario_generate_chains.py +++ b/src/scripts/analysis_data_generation/scenario_generate_chains.py @@ -53,11 +53,11 @@ def __init__(self): super().__init__() self.seed = 42 self.start_date = Date(2010, 1, 1) - self.end_date = self.start_date + pd.DateOffset(months=18) + self.end_date = self.start_date + pd.DateOffset(months=1) self.pop_size = 1000 self._scenarios = self._get_scenarios() self.number_of_draws = len(self._scenarios) - self.runs_per_draw = 1 + self.runs_per_draw = 3 self.generate_event_chains = True def log_configuration(self): diff --git a/src/tlo/analysis/utils.py b/src/tlo/analysis/utils.py index e605400332..f762f1eb92 100644 --- a/src/tlo/analysis/utils.py +++ b/src/tlo/analysis/utils.py @@ -341,6 +341,117 @@ def generate_series(dataframe: pd.DataFrame) -> pd.Series: _concat = pd.concat(res, axis=1) _concat.columns.names = ['draw', 'run'] # name the levels of the columns multi-index return _concat + + +import pandas as pd + +def unpack_dict_rows(df): + """ + Reconstruct a full dataframe from rows whose columns contain dictionaries + mapping local-row-index → value. Preserves original column order. + """ + original_cols = ['E', 'EventDate', 'EventName', 'A', 'V'] + reconstructed_rows = [] + + for _, row in df.iterrows(): + # Determine how many rows this block has (using the first dict column) + first_dict_col = next(col for col in original_cols if isinstance(row[col], dict)) + block_length = len(row[first_dict_col]) + + # Build each reconstructed row + for i in range(block_length): + new_row = {} + for col in original_cols: + cell = row[col] + if not isinstance(cell, dict): + raise ValueError(f"Column {col} does not contain a dictionary") + new_row[col] = cell.get(str(i)) + reconstructed_rows.append(new_row) + + # Build DataFrame and enforce the original column order + out = pd.DataFrame(reconstructed_rows)[original_cols] + return out.reset_index(drop=True) + + +def print_filtered_df(df): + """ + Prints rows of the DataFrame excluding EventName 'Initialise' and 'Birth'. + """ + pd.set_option('display.max_colwidth', None) + filtered = df#[~df['EventName'].isin(['StartOfSimulation', 'Birth'])] + + dict_cols = ["Info"] + max_items = 2 + # Step 2: Truncate dictionary columns for display + if dict_cols is not None: + for col in dict_cols: + def truncate_dict(d): + if isinstance(d, dict): + items = list(d.items())[:max_items] # keep only first `max_items` + return dict(items) + return d + filtered[col] = filtered[col].apply(truncate_dict) + print(filtered) + + +def extract_event_chains(results_folder: Path, + ) -> dict: + """Utility function to collect chains of events. Individuals across runs of the same draw will be combined into unique df. + Returns dictionary where keys are draws, and each draw is associated with a dataframe of format 'E', 'EventDate', 'EventName', 'Info' where 'Info' is a dictionary that combines A&Vs for a particular individual + date + event name combination. + """ + module = 'tlo.simulation' + key = 'event_chains' + + # get number of draws and numbers of runs + info = get_scenario_info(results_folder) + + # Collect results from each draw/run. Individuals across runs of the same draw will be combined into unique df. + res = dict() + + for draw in range(info['number_of_draws']): + + # All individuals in same draw will be combined across runs, so their ID will be offset. + dfs_from_runs = [] + ID_offset = 0 + + for run in range(info['runs_per_draw']): + + try: + df: pd.DataFrame = load_pickled_dataframes(results_folder, draw, run, module)[module][key] + del df['date'] + recon = unpack_dict_rows(df) + # For now convert value to string in all cases to facilitate manipulation. This can be reversed later. + recon['V'] = recon['V'].apply(str) + # Collapse into 'E', 'EventDate', 'EventName', 'Info' format where 'Info' is dict listing attributes (e.g. {a1:v1, a2:v2, a3:v3, ...} ) + df_collapsed = ( + recon.groupby(['E', 'EventDate', 'EventName']) + .apply(lambda g: dict(zip(g['A'], g['V']))) + .reset_index(name='Info') + ) + df_final = df_collapsed.sort_values(by=['E','EventDate'], ascending=True).reset_index(drop=True) + birth_count = (df_final['EventName'] == 'Birth').sum() + + print("Birth count for run ", run, "is ", birth_count) + df_final['E'] = df_final['E'] + ID_offset + + # Calculate ID offset for next run + ID_offset = (max(df_final['E']) + 1) + + # Append these chains to list + dfs_from_runs.append(df_final) + + except KeyError: + # Some logs could not be found - probably because this run failed. + # Simply to not append anything to the df collecting chains. + print("Run failed") + + # Combine all dfs into a single DataFrame + res[draw] = pd.concat(dfs_from_runs, ignore_index=True) + + # Optionally, sort by 'E' and 'EventDate' after combining + res[draw] = res[draw].sort_values(by=['E', 'EventDate']).reset_index(drop=True) + + return res def summarize(results: pd.DataFrame, only_mean: bool = False, collapse_columns: bool = False) -> pd.DataFrame: diff --git a/src/tlo/events.py b/src/tlo/events.py index 9e9865cdad..ba91218dbc 100644 --- a/src/tlo/events.py +++ b/src/tlo/events.py @@ -11,7 +11,7 @@ import pandas as pd -from tlo.util import FACTOR_POP_DICT, convert_dict_into_eav +from tlo.util import convert_chain_links_into_EAV import copy @@ -139,9 +139,8 @@ def compare_population_dataframe_and_mni(self,df_before, df_after, entire_mni_be # Create a dictionary for this person # First add event info link_info = { - 'person_ID': idx, - 'event': type(self).__name__, - 'event_date': self.sim.date, + 'EventDate': self.sim.date, + 'EventName': type(self).__name__, } # Store the new values from df_after for the changed columns @@ -154,7 +153,7 @@ def compare_population_dataframe_and_mni(self,df_before, df_after, entire_mni_be link_info[col] = diff_mni[idx][key] # Append the event and changes to the individual key - chain_links[idx] = str(link_info) + chain_links[idx] = link_info # For individuals which only underwent changes in mni dictionary, save changes here if len(diff_mni)>0: @@ -162,15 +161,14 @@ def compare_population_dataframe_and_mni(self,df_before, df_after, entire_mni_be if key not in persons_changed: # If individual hadn't been previously added due to changes in pop df, add it here link_info = { - 'person_ID': key, - 'event': type(self).__name__, - 'event_date': self.sim.date, + 'EventDate': self.sim.date, + 'EventName': type(self).__name__, } for key_prop in diff_mni[key]: link_info[key_prop] = diff_mni[key][key_prop] - chain_links[key] = str(link_info) + chain_links[key] = link_info return chain_links @@ -233,12 +231,10 @@ def store_chains_to_do_after_event(self, row_before, df_before, mni_row_before, mni_instances_after = True # Create and store event for this individual, regardless of whether any property change occurred - link_info = {} - # #'person_ID' : self.target, - # 'person_ID' : self.target, - # 'event' : type(self).__name__, - # 'event_date' : self.sim.date, - #} + link_info = { + 'EventDate' : self.sim.date, + 'EventName' : type(self).__name__, + } # Store (if any) property changes as a result of the event for this individual for key in row_before.index: @@ -265,11 +261,8 @@ def store_chains_to_do_after_event(self, row_before, df_before, mni_row_before, link_info[key] = mni[self.target][key] # Else, no need to do anything - eav = convert_dict_into_eav(link_info, self.target, self.sim.date, type(self).__name__) - print(eav) - exit(-1) # Add individual to the chain links - chain_links[self.target] = str(link_info) + chain_links[self.target] = link_info else: # Target is entire population. Identify individuals for which properties have changed @@ -300,6 +293,14 @@ def run(self): if self.sim.generate_event_chains and print_chains: chain_links = self.store_chains_to_do_after_event(row_before, df_before, mni_row_before, entire_mni_before, mni_instances_before) + if chain_links: + # Convert chain_links into EAV + ednav = convert_chain_links_into_EAV(chain_links) + + logger_chain.info(key='event_chains', + data= ednav.to_dict(), + description='Links forming chains of events for simulated individuals') + """ # Create empty logger for entire pop pop_dict = {i: '' for i in range(FACTOR_POP_DICT)} # Always include all possible individuals pop_dict.update(chain_links) @@ -310,7 +311,7 @@ def run(self): logger_chain.info(key='event_chains', data= pop_dict, description='Links forming chains of events for simulated individuals') - + """ class RegularEvent(Event): """An event that automatically reschedules itself at a fixed frequency.""" diff --git a/src/tlo/methods/hsi_event.py b/src/tlo/methods/hsi_event.py index 85ac6da3e2..59b7b1f60a 100644 --- a/src/tlo/methods/hsi_event.py +++ b/src/tlo/methods/hsi_event.py @@ -8,7 +8,7 @@ from tlo import Date, logging from tlo.events import Event from tlo.population import Population -from tlo.util import FACTOR_POP_DICT +from tlo.util import convert_chain_links_into_EAV import pandas as pd @@ -266,9 +266,8 @@ def store_chains_to_do_after_event(self, row_before, footprint, mni_row_before, record_level = 'N/A' link_info = { - 'person_ID': self.target, - 'event' : type(self).__name__, - 'event_date' : self.sim.date, + 'EventName' : type(self).__name__, + 'EventDate' : self.sim.date, 'appt_footprint' : record_footprint, 'level' : record_level, } @@ -297,7 +296,7 @@ def store_chains_to_do_after_event(self, row_before, footprint, mni_row_before, if self.values_differ(default[key], mni[self.target][key]): link_info[key] = mni[self.target][key] - chain_links[self.target] = str(link_info) + chain_links[self.target] = link_info return chain_links @@ -325,13 +324,13 @@ def run(self, squeeze_factor): if print_chains: chain_links = self.store_chains_to_do_after_event(row_before, str(footprint), mni_row_before, mni_instances_before) - if len(chain_links)>0: - pop_dict = {i: '' for i in range(FACTOR_POP_DICT)} - pop_dict.update(chain_links) + if chain_links: - logger_chains.info(key='event_chains', - data = pop_dict, - description='Links forming chains of events for simulated individuals') + # Convert chain_links into EAV + ednav = convert_chain_links_into_EAV(chain_links) + logger_chain.info(key='event_chains', + data = ednav, + description='Links forming chains of events for simulated individuals') return updated_appt_footprint diff --git a/src/tlo/simulation.py b/src/tlo/simulation.py index ef27fa6381..da55d42efc 100644 --- a/src/tlo/simulation.py +++ b/src/tlo/simulation.py @@ -13,7 +13,7 @@ import pandas as pd import tlo.population import numpy as np -from tlo.util import FACTOR_POP_DICT, df_to_eav +from tlo.util import df_to_EAV, convert_chain_links_into_EAV try: import dill @@ -290,26 +290,11 @@ def make_initial_population(self, *, n: int) -> None: # At the start of the simulation + when a new individual is born, we therefore want to store all of their properties at the start. if self.generate_event_chains: - print(len(self.population.props), n) - # EAV structure to capture status of individuals at the start of the simulation - eav = df_to_eav(self.population.props, self.date, 'StartOfSimulation') - - """ - pop_dict = self.population.props.to_dict(orient='index') - - for key in pop_dict.keys(): - pop_dict[key]['person_ID'] = key - pop_dict[key] = str(pop_dict[key]) # Log as string to avoid issues around length of properties stored later - - pop_dict_full = {i: '' for i in range(FACTOR_POP_DICT)} - pop_dict_full.update(pop_dict) - - logger.info(key='event_chains', - data = pop_dict_full, - description='Links forming chains of events for simulated individuals') - """ + # EDNAV structure to capture status of individuals at the start of the simulation + ednav = df_to_EAV(self.population.props, self.date, 'StartOfSimulation') + logger.info(key='event_chains', - data = eav.to_dict(), + data = ednav.to_dict(), description='Links forming chains of events for simulated individuals') end = time.time() @@ -475,15 +460,16 @@ def do_birth(self, mother_id: int) -> int: if self.generate_event_chains: # When individual is born, store their initial properties to provide a starting point to the chain of property # changes that this individual will undergo as a result of events taking place. - prop_dict = self.population.props.loc[child_id].to_dict() - prop_dict['event'] = 'Birth' - prop_dict['event_date'] = self.date - - pop_dict = {i: '' for i in range(FACTOR_POP_DICT)} # Always include all possible individuals - pop_dict[child_id] = str(prop_dict) # Convert to string to avoid issue of length + link_info = self.population.props.loc[child_id].to_dict() + link_info['EventName'] = 'Birth' + link_info['EventDate'] = self.date + chain_links = {} + chain_links[child_id] = link_info # Convert to string to avoid issue of length + ednav = convert_chain_links_into_EAV(chain_links) + logger.info(key='event_chains', - data = pop_dict, + data = ednav.to_dict(), description='Links forming chains of events for simulated individuals') return child_id diff --git a/src/tlo/util.py b/src/tlo/util.py index e83e19baab..ee29445e9a 100644 --- a/src/tlo/util.py +++ b/src/tlo/util.py @@ -13,7 +13,6 @@ # Default mother_id value, assigned to individuals initialised as adults at the start of the simulation. DEFAULT_MOTHER_ID = -1e7 -FACTOR_POP_DICT = 50000 def create_age_range_lookup(min_age: int, max_age: int, range_size: int = 5) -> (list, Dict[int, str]): @@ -94,25 +93,30 @@ def transition_states(initial_series: pd.Series, prob_matrix: pd.DataFrame, rng: return final_states -def df_to_eav(df, date, event_name): +def df_to_EAV(df, date, event_name): """Function to convert dataframe into EAV""" eav = df.stack().reset_index() eav.columns = ['E', 'A', 'V'] - eav['Date'] = date - eav['NameEvent'] = event_name - eav = eav[["E", "Date", "NameEvent", "A", "V"]] + eav['EventDate'] = date + eav['EventName'] = event_name + eav = eav[["E", "EventDate", "EventName", "A", "V"]] return eav -def convert_dict_into_eav(link_info, target, date, event_name): - "Function to convert link info in the form of dictionary into an EAV" - eav = pd.DataFrame(list(link_info.items()), columns=['A', 'V']) - eav.columns = ['A', 'V'] - eav['E'] = target - eav['Date'] = date - eav['NameEvent'] = event_name - eav = eav[['E', 'Date', 'NameEvent', 'A', 'V']] +def convert_chain_links_into_EAV(chain_links): + df = pd.DataFrame.from_dict(chain_links, orient="index") + id_cols = ["EventDate", "EventName"] + + eav = df.reset_index().melt( + id_vars=["index"] + id_cols, # index = person ID + var_name="A", + value_name="V" + ) + + eav.rename(columns={"index": "E"}, inplace=True) + + eav = eav[["E", "EventDate", "EventName", "A", "V"]] return eav From 5234550934fd0bf156e43603d593945c66d888c0 Mon Sep 17 00:00:00 2001 From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com> Date: Fri, 21 Nov 2025 13:44:52 +0000 Subject: [PATCH 33/54] No need to store EventDate since this is already stored in logger by default --- src/tlo/analysis/utils.py | 62 ++++++++++++++++++++++++++++++++---- src/tlo/events.py | 3 -- src/tlo/methods/hsi_event.py | 1 - src/tlo/simulation.py | 1 - src/tlo/util.py | 7 ++-- 5 files changed, 59 insertions(+), 15 deletions(-) diff --git a/src/tlo/analysis/utils.py b/src/tlo/analysis/utils.py index f762f1eb92..00a297030b 100644 --- a/src/tlo/analysis/utils.py +++ b/src/tlo/analysis/utils.py @@ -345,7 +345,7 @@ def generate_series(dataframe: pd.DataFrame) -> pd.Series: import pandas as pd -def unpack_dict_rows(df): +def old_unpack_dict_rows(df): """ Reconstruct a full dataframe from rows whose columns contain dictionaries mapping local-row-index → value. Preserves original column order. @@ -372,6 +372,54 @@ def unpack_dict_rows(df): out = pd.DataFrame(reconstructed_rows)[original_cols] return out.reset_index(drop=True) + +def unpack_dict_rows(df, non_dict_cols=None): + """ + Reconstruct a full DataFrame from rows where most columns are dictionaries. + Non-dict columns (e.g., 'date') are propagated to all reconstructed rows. + + Parameters: + df: pd.DataFrame + non_dict_cols: list of columns that are NOT dictionaries + """ + if non_dict_cols is None: + non_dict_cols = [] + + original_cols = ['E', 'date', 'EventName', 'A', 'V'] + + reconstructed_rows = [] + + for _, row in df.iterrows(): + # Determine dict columns for this row + dict_cols = [col for col in original_cols if col not in non_dict_cols] + + if not dict_cols: + # No dict columns, just append row + reconstructed_rows.append(row.to_dict()) + continue + + # Use the first dict column to get the block length + first_dict_col = dict_cols[0] + block_length = len(row[first_dict_col]) + + # Build each expanded row + for i in range(block_length): + new_row = {} + for col in original_cols: + cell = row[col] + if col in dict_cols: + # Access the dict using string or integer keys + new_row[col] = cell.get(str(i), cell.get(i)) + else: + # Propagate non-dict value + new_row[col] = cell + reconstructed_rows.append(new_row) + + # Build DataFrame in original column order + out = pd.DataFrame(reconstructed_rows)[original_cols] + + return out.reset_index(drop=True) + def print_filtered_df(df): """ @@ -418,17 +466,19 @@ def extract_event_chains(results_folder: Path, try: df: pd.DataFrame = load_pickled_dataframes(results_folder, draw, run, module)[module][key] - del df['date'] - recon = unpack_dict_rows(df) + + recon = unpack_dict_rows(df, ['date']) + print(recon) + #del recon['EventDate'] # For now convert value to string in all cases to facilitate manipulation. This can be reversed later. recon['V'] = recon['V'].apply(str) # Collapse into 'E', 'EventDate', 'EventName', 'Info' format where 'Info' is dict listing attributes (e.g. {a1:v1, a2:v2, a3:v3, ...} ) df_collapsed = ( - recon.groupby(['E', 'EventDate', 'EventName']) + recon.groupby(['E', 'date', 'EventName']) .apply(lambda g: dict(zip(g['A'], g['V']))) .reset_index(name='Info') ) - df_final = df_collapsed.sort_values(by=['E','EventDate'], ascending=True).reset_index(drop=True) + df_final = df_collapsed.sort_values(by=['E','date'], ascending=True).reset_index(drop=True) birth_count = (df_final['EventName'] == 'Birth').sum() print("Birth count for run ", run, "is ", birth_count) @@ -449,7 +499,7 @@ def extract_event_chains(results_folder: Path, res[draw] = pd.concat(dfs_from_runs, ignore_index=True) # Optionally, sort by 'E' and 'EventDate' after combining - res[draw] = res[draw].sort_values(by=['E', 'EventDate']).reset_index(drop=True) + res[draw] = res[draw].sort_values(by=['E', 'date']).reset_index(drop=True) return res diff --git a/src/tlo/events.py b/src/tlo/events.py index ba91218dbc..4b62c16932 100644 --- a/src/tlo/events.py +++ b/src/tlo/events.py @@ -139,7 +139,6 @@ def compare_population_dataframe_and_mni(self,df_before, df_after, entire_mni_be # Create a dictionary for this person # First add event info link_info = { - 'EventDate': self.sim.date, 'EventName': type(self).__name__, } @@ -161,7 +160,6 @@ def compare_population_dataframe_and_mni(self,df_before, df_after, entire_mni_be if key not in persons_changed: # If individual hadn't been previously added due to changes in pop df, add it here link_info = { - 'EventDate': self.sim.date, 'EventName': type(self).__name__, } @@ -232,7 +230,6 @@ def store_chains_to_do_after_event(self, row_before, df_before, mni_row_before, # Create and store event for this individual, regardless of whether any property change occurred link_info = { - 'EventDate' : self.sim.date, 'EventName' : type(self).__name__, } diff --git a/src/tlo/methods/hsi_event.py b/src/tlo/methods/hsi_event.py index 59b7b1f60a..d59f8e2404 100644 --- a/src/tlo/methods/hsi_event.py +++ b/src/tlo/methods/hsi_event.py @@ -267,7 +267,6 @@ def store_chains_to_do_after_event(self, row_before, footprint, mni_row_before, link_info = { 'EventName' : type(self).__name__, - 'EventDate' : self.sim.date, 'appt_footprint' : record_footprint, 'level' : record_level, } diff --git a/src/tlo/simulation.py b/src/tlo/simulation.py index da55d42efc..35f6818f66 100644 --- a/src/tlo/simulation.py +++ b/src/tlo/simulation.py @@ -462,7 +462,6 @@ def do_birth(self, mother_id: int) -> int: # changes that this individual will undergo as a result of events taking place. link_info = self.population.props.loc[child_id].to_dict() link_info['EventName'] = 'Birth' - link_info['EventDate'] = self.date chain_links = {} chain_links[child_id] = link_info # Convert to string to avoid issue of length diff --git a/src/tlo/util.py b/src/tlo/util.py index ee29445e9a..d678aa09ef 100644 --- a/src/tlo/util.py +++ b/src/tlo/util.py @@ -97,16 +97,15 @@ def df_to_EAV(df, date, event_name): """Function to convert dataframe into EAV""" eav = df.stack().reset_index() eav.columns = ['E', 'A', 'V'] - eav['EventDate'] = date eav['EventName'] = event_name - eav = eav[["E", "EventDate", "EventName", "A", "V"]] + eav = eav[["E", "EventName", "A", "V"]] return eav def convert_chain_links_into_EAV(chain_links): df = pd.DataFrame.from_dict(chain_links, orient="index") - id_cols = ["EventDate", "EventName"] + id_cols = ["EventName"] eav = df.reset_index().melt( id_vars=["index"] + id_cols, # index = person ID @@ -116,7 +115,7 @@ def convert_chain_links_into_EAV(chain_links): eav.rename(columns={"index": "E"}, inplace=True) - eav = eav[["E", "EventDate", "EventName", "A", "V"]] + eav = eav[["E", "EventName", "A", "V"]] return eav From 2f20cb392a9aaee1c8d004a82e4f31957d2130b8 Mon Sep 17 00:00:00 2001 From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com> Date: Fri, 21 Nov 2025 14:45:16 +0000 Subject: [PATCH 34/54] Check if PregnancySupervisor is included before considering in chain of events production --- src/tlo/events.py | 101 ++++++++++++++++++++--------------- src/tlo/methods/hsi_event.py | 53 +++++++++--------- 2 files changed, 87 insertions(+), 67 deletions(-) diff --git a/src/tlo/events.py b/src/tlo/events.py index 4b62c16932..f03f150f92 100644 --- a/src/tlo/events.py +++ b/src/tlo/events.py @@ -122,7 +122,10 @@ def compare_population_dataframe_and_mni(self,df_before, df_after, entire_mni_be # Create a mask of where values are different diff_mask = (df_before != df_after) & ~(df_before.isna() & df_after.isna()) - diff_mni = self.compare_entire_mni_dicts(entire_mni_before, entire_mni_after) + if 'PregnancySupervisor' in self.sim.modules: + diff_mni = self.compare_entire_mni_dicts(entire_mni_before, entire_mni_after) + else: + diff_mni = [] # Create an empty list to store changes for each of the individuals chain_links = {} @@ -154,19 +157,20 @@ def compare_population_dataframe_and_mni(self,df_before, df_after, entire_mni_be # Append the event and changes to the individual key chain_links[idx] = link_info - # For individuals which only underwent changes in mni dictionary, save changes here - if len(diff_mni)>0: - for key in diff_mni: - if key not in persons_changed: - # If individual hadn't been previously added due to changes in pop df, add it here - link_info = { - 'EventName': type(self).__name__, - } - - for key_prop in diff_mni[key]: - link_info[key_prop] = diff_mni[key][key_prop] + if 'PregnancySupervisor' in self.sim.modules: + # For individuals which only underwent changes in mni dictionary, save changes here + if len(diff_mni)>0: + for key in diff_mni: + if key not in persons_changed: + # If individual hadn't been previously added due to changes in pop df, add it here + link_info = { + 'EventName': type(self).__name__, + } - chain_links[key] = link_info + for key_prop in diff_mni[key]: + link_info[key_prop] = diff_mni[key][key_prop] + + chain_links[key] = link_info return chain_links @@ -197,17 +201,23 @@ def store_chains_to_do_before_event(self) -> tuple[bool, pd.Series, pd.DataFrame row_before = self.sim.population.props.loc[abs(self.target)].copy().fillna(-99999) # Check if individual is already in mni dictionary, if so copy her original status - mni = self.sim.modules['PregnancySupervisor'].mother_and_newborn_info - if self.target in mni: - mni_instances_before = True - mni_row_before = mni[self.target].copy() + if 'PregnancySupervisor' in self.sim.modules: + mni = self.sim.modules['PregnancySupervisor'].mother_and_newborn_info + if self.target in mni: + mni_instances_before = True + mni_row_before = mni[self.target].copy() + else: + mni_row_before = None else: # This will be a population-wide event. In order to find individuals for which this led to # a meaningful change, make a copy of the while pop dataframe/mni before the event has occurred. df_before = self.sim.population.props.copy() - entire_mni_before = copy.deepcopy(self.sim.modules['PregnancySupervisor'].mother_and_newborn_info) + if 'PregnancySupervisor' in self.sim.modules: + entire_mni_before = copy.deepcopy(self.sim.modules['PregnancySupervisor'].mother_and_newborn_info) + else: + entire_mni_before = None return print_chains, row_before, df_before, mni_row_before, entire_mni_before, mni_instances_before @@ -224,9 +234,12 @@ def store_chains_to_do_after_event(self, row_before, df_before, mni_row_before, # Check if individual is in mni after the event mni_instances_after = False - mni = self.sim.modules['PregnancySupervisor'].mother_and_newborn_info - if self.target in mni: - mni_instances_after = True + if 'PregnancySupervisor' in self.sim.modules: + mni = self.sim.modules['PregnancySupervisor'].mother_and_newborn_info + if self.target in mni: + mni_instances_after = True + else: + mni_instances_after = None # Create and store event for this individual, regardless of whether any property change occurred link_info = { @@ -237,26 +250,27 @@ def store_chains_to_do_after_event(self, row_before, df_before, mni_row_before, for key in row_before.index: if row_before[key] != row_after[key]: # Note: used fillna previously, so this is safe link_info[key] = row_after[key] - - # Now check and store changes in the mni dictionary, accounting for following cases: - # Individual is in mni dictionary before and after - if mni_instances_before and mni_instances_after: - for key in mni_row_before: - if self.mni_values_differ(mni_row_before[key], mni[self.target][key]): - link_info[key] = mni[self.target][key] - # Individual is only in mni dictionary before event - elif mni_instances_before and not mni_instances_after: - default = self.sim.modules['PregnancySupervisor'].default_all_mni_values - for key in mni_row_before: - if self.mni_values_differ(mni_row_before[key], default[key]): - link_info[key] = default[key] - # Individual is only in mni dictionary after event - elif mni_instances_after and not mni_instances_before: - default = self.sim.modules['PregnancySupervisor'].default_all_mni_values - for key in default: - if self.mni_values_differ(default[key], mni[self.target][key]): - link_info[key] = mni[self.target][key] - # Else, no need to do anything + + if 'PregnancySupervisor' in self.sim.modules: + # Now check and store changes in the mni dictionary, accounting for following cases: + # Individual is in mni dictionary before and after + if mni_instances_before and mni_instances_after: + for key in mni_row_before: + if self.mni_values_differ(mni_row_before[key], mni[self.target][key]): + link_info[key] = mni[self.target][key] + # Individual is only in mni dictionary before event + elif mni_instances_before and not mni_instances_after: + default = self.sim.modules['PregnancySupervisor'].default_all_mni_values + for key in mni_row_before: + if self.mni_values_differ(mni_row_before[key], default[key]): + link_info[key] = default[key] + # Individual is only in mni dictionary after event + elif mni_instances_after and not mni_instances_before: + default = self.sim.modules['PregnancySupervisor'].default_all_mni_values + for key in default: + if self.mni_values_differ(default[key], mni[self.target][key]): + link_info[key] = mni[self.target][key] + # Else, no need to do anything # Add individual to the chain links chain_links[self.target] = link_info @@ -267,7 +281,10 @@ def store_chains_to_do_after_event(self, row_before, df_before, mni_row_before, # Population frame after event df_after = self.sim.population.props - entire_mni_after = copy.deepcopy(self.sim.modules['PregnancySupervisor'].mother_and_newborn_info) + if 'PregnancySupervisor' in self.sim.modules: + entire_mni_after = copy.deepcopy(self.sim.modules['PregnancySupervisor'].mother_and_newborn_info) + else: + entire_mni_after = None # Create and store the event and dictionary of changes for affected individuals chain_links = self.compare_population_dataframe_and_mni(df_before, df_after, entire_mni_before, entire_mni_after) diff --git a/src/tlo/methods/hsi_event.py b/src/tlo/methods/hsi_event.py index 7d960077fc..edb5d3df3b 100644 --- a/src/tlo/methods/hsi_event.py +++ b/src/tlo/methods/hsi_event.py @@ -239,10 +239,11 @@ def store_chains_to_do_before_event(self) -> tuple[bool, pd.Series, dict, bool]: row_before = self.sim.population.props.loc[abs(self.target)].copy().fillna(-99999) # Check if individual is in mni dictionary before the event, if so store its original status - mni = self.sim.modules['PregnancySupervisor'].mother_and_newborn_info - if self.target in mni: - mni_instances_before = True - mni_row_before = mni[self.target].copy() + if 'PregnancySupervisor' in self.sim.modules: + mni = self.sim.modules['PregnancySupervisor'].mother_and_newborn_info + if self.target in mni: + mni_instances_before = True + mni_row_before = mni[self.target].copy() else: print("ERROR: there shouldn't be pop-wide HSI event") @@ -259,9 +260,10 @@ def store_chains_to_do_after_event(self, row_before, footprint, mni_row_before, row_after = self.sim.population.props.loc[abs(self.target)].fillna(-99999) mni_instances_after = False - mni = self.sim.modules['PregnancySupervisor'].mother_and_newborn_info - if self.target in mni: - mni_instances_after = True + if 'PregnancySupervisor' in self.sim.modules: + mni = self.sim.modules['PregnancySupervisor'].mother_and_newborn_info + if self.target in mni: + mni_instances_after = True # Create and store dictionary of changes. Note that person_ID, event, event_date, appt_foot, and level # will be stored regardless of whether individual experienced property changes or not. @@ -285,24 +287,25 @@ def store_chains_to_do_after_event(self, row_before, footprint, mni_row_before, if row_before[key] != row_after[key]: # Note: used fillna previously link_info[key] = row_after[key] - # Now store changes in the mni dictionary, accounting for following cases: - # Individual is in mni dictionary before and after - if mni_instances_before and mni_instances_after: - for key in mni_row_before: - if self.values_differ(mni_row_before[key], mni[self.target][key]): - link_info[key] = mni[self.target][key] - # Individual is only in mni dictionary before event - elif mni_instances_before and not mni_instances_after: - default = self.sim.modules['PregnancySupervisor'].default_all_mni_values - for key in mni_row_before: - if self.values_differ(mni_row_before[key], default[key]): - link_info[key] = default[key] - # Individual is only in mni dictionary after event - elif mni_instances_after and not mni_instances_before: - default = self.sim.modules['PregnancySupervisor'].default_all_mni_values - for key in default: - if self.values_differ(default[key], mni[self.target][key]): - link_info[key] = mni[self.target][key] + if 'PregnancySupervisor' in self.sim.modules: + # Now store changes in the mni dictionary, accounting for following cases: + # Individual is in mni dictionary before and after + if mni_instances_before and mni_instances_after: + for key in mni_row_before: + if self.values_differ(mni_row_before[key], mni[self.target][key]): + link_info[key] = mni[self.target][key] + # Individual is only in mni dictionary before event + elif mni_instances_before and not mni_instances_after: + default = self.sim.modules['PregnancySupervisor'].default_all_mni_values + for key in mni_row_before: + if self.values_differ(mni_row_before[key], default[key]): + link_info[key] = default[key] + # Individual is only in mni dictionary after event + elif mni_instances_after and not mni_instances_before: + default = self.sim.modules['PregnancySupervisor'].default_all_mni_values + for key in default: + if self.values_differ(default[key], mni[self.target][key]): + link_info[key] = mni[self.target][key] chain_links[self.target] = link_info From 1b838235e2ca27e82412f37d302ff40adccbeba0 Mon Sep 17 00:00:00 2001 From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com> Date: Sat, 22 Nov 2025 09:42:59 +0000 Subject: [PATCH 35/54] Remove old util fnc --- src/tlo/analysis/utils.py | 30 ------------------------------ 1 file changed, 30 deletions(-) diff --git a/src/tlo/analysis/utils.py b/src/tlo/analysis/utils.py index bc8784ae66..fc0d374fd1 100644 --- a/src/tlo/analysis/utils.py +++ b/src/tlo/analysis/utils.py @@ -364,36 +364,6 @@ def generate_series(dataframe: pd.DataFrame) -> pd.Series: _concat = pd.concat(res, axis=1) _concat.columns.names = ['draw', 'run'] # name the levels of the columns multi-index return _concat - - -import pandas as pd - -def old_unpack_dict_rows(df): - """ - Reconstruct a full dataframe from rows whose columns contain dictionaries - mapping local-row-index → value. Preserves original column order. - """ - original_cols = ['E', 'EventDate', 'EventName', 'A', 'V'] - reconstructed_rows = [] - - for _, row in df.iterrows(): - # Determine how many rows this block has (using the first dict column) - first_dict_col = next(col for col in original_cols if isinstance(row[col], dict)) - block_length = len(row[first_dict_col]) - - # Build each reconstructed row - for i in range(block_length): - new_row = {} - for col in original_cols: - cell = row[col] - if not isinstance(cell, dict): - raise ValueError(f"Column {col} does not contain a dictionary") - new_row[col] = cell.get(str(i)) - reconstructed_rows.append(new_row) - - # Build DataFrame and enforce the original column order - out = pd.DataFrame(reconstructed_rows)[original_cols] - return out.reset_index(drop=True) def unpack_dict_rows(df, non_dict_cols=None): From f4cf120a60bf6da13fecb2d66dfe05fd4f495aff Mon Sep 17 00:00:00 2001 From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com> Date: Sat, 22 Nov 2025 09:44:08 +0000 Subject: [PATCH 36/54] Overwrite any changes to hiv and tb file --- src/tlo/methods/hiv.py | 35 --------------- src/tlo/methods/tb.py | 96 ++++++++++-------------------------------- 2 files changed, 23 insertions(+), 108 deletions(-) diff --git a/src/tlo/methods/hiv.py b/src/tlo/methods/hiv.py index 7ecc741c25..8b40e37a34 100644 --- a/src/tlo/methods/hiv.py +++ b/src/tlo/methods/hiv.py @@ -1682,37 +1682,6 @@ def do_at_generic_first_appt( # Main Polling Event # --------------------------------------------------------------------------- -class HivPollingEventForDataGeneration(RegularEvent, PopulationScopeEventMixin): - """ The HIV Polling Events for Data Generation - * Ensures that - """ - - def __init__(self, module): - super().__init__( - module, frequency=DateOffset(years=120) - ) # repeats every 12 months, but this can be changed - - def apply(self, population): - - df = population.props - - # Make everyone who is alive and not infected (no-one should be) susceptible - susc_idx = df.loc[ - df.is_alive - & ~df.hv_inf - ].index - - n_susceptible = len(susc_idx) - print("Number of individuals susceptible", n_susceptible) - # Schedule the date of infection for each new infection: - for i in susc_idx: - date_of_infection = self.sim.date + pd.DateOffset( - # Ensure that individual will be infected before end of sim - days=self.module.rng.randint(0, 365*(int(self.sim.end_date.year - self.sim.date.year)+1)) - ) - self.sim.schedule_event( - HivInfectionEvent(self.module, i), date_of_infection - ) class HivRegularPollingEvent(RegularEvent, PopulationScopeEventMixin): """ The HIV Regular Polling Events @@ -1734,7 +1703,6 @@ def apply(self, population): fraction_of_year_between_polls = self.frequency.months / 12 beta = p["beta"] * fraction_of_year_between_polls - # ----------------------------------- HORIZONTAL TRANSMISSION ----------------------------------- def horizontal_transmission(to_sex, from_sex): # Count current number of alive 15-80 year-olds at risk of transmission @@ -1810,7 +1778,6 @@ def horizontal_transmission(to_sex, from_sex): HivInfectionEvent(self.module, idx), date_of_infection ) - # ----------------------------------- SPONTANEOUS TESTING ----------------------------------- def spontaneous_testing(current_year): @@ -1935,8 +1902,6 @@ def vmmc_for_child(): vmmc_for_child() - - # --------------------------------------------------------------------------- # Natural History Events # --------------------------------------------------------------------------- diff --git a/src/tlo/methods/tb.py b/src/tlo/methods/tb.py index 71361a7951..d9ba7309e0 100644 --- a/src/tlo/methods/tb.py +++ b/src/tlo/methods/tb.py @@ -864,31 +864,29 @@ def initialise_population(self, population): df["tb_on_ipt"] = False df["tb_date_ipt"] = pd.NaT - # # ------------------ infection status ------------------ # - if self.sim.generate_event_chains is False or self.sim.generate_event_chains is None: - # WHO estimates of active TB for 2010 to get infected initial population - # don't need to scale or include treated proportion as no-one on treatment yet - inc_estimates = p["who_incidence_estimates"] - incidence_year = (inc_estimates.loc[ - (inc_estimates.year == self.sim.date.year), "incidence_per_100k" - ].values[0]) / 100_000 - - incidence_year = incidence_year * p["scaling_factor_WHO"] - - self.assign_active_tb( - population, - strain="ds", - incidence=incidence_year) - - self.assign_active_tb( - population, - strain="mdr", - incidence=incidence_year * p['prop_mdr2010']) - - self.send_for_screening_general( - population - ) # send some baseline population for screening + # WHO estimates of active TB for 2010 to get infected initial population + # don't need to scale or include treated proportion as no-one on treatment yet + inc_estimates = p["who_incidence_estimates"] + incidence_year = (inc_estimates.loc[ + (inc_estimates.year == self.sim.date.year), "incidence_per_100k" + ].values[0]) / 100_000 + + incidence_year = incidence_year * p["scaling_factor_WHO"] + + self.assign_active_tb( + population, + strain="ds", + incidence=incidence_year) + + self.assign_active_tb( + population, + strain="mdr", + incidence=incidence_year * p['prop_mdr2010']) + + self.send_for_screening_general( + population + ) # send some baseline population for screening def initialise_simulation(self, sim): """ @@ -901,10 +899,8 @@ def initialise_simulation(self, sim): sim.schedule_event(TbActiveEvent(self), sim.date) sim.schedule_event(TbRegularEvents(self), sim.date) sim.schedule_event(TbSelfCureEvent(self), sim.date) - sim.schedule_event(TbActiveCasePoll(self), sim.date + DateOffset(years=1)) - # 2) log at the end of the year # Optional: Schedule the scale-up of programs if self.parameters["type_of_scaleup"] != 'none': @@ -1406,53 +1402,6 @@ def is_subset(col_for_set, col_for_subset): # # TB infection event # # --------------------------------------------------------------------------- -class TbActiveCasePollGenerateData(RegularEvent, PopulationScopeEventMixin): - """The Tb Regular Poll Event for Data Generation for assigning active infections - * selects everyone to develop an active infection and schedules onset of active tb - sometime during the simulation - """ - - def __init__(self, module): - super().__init__(module, frequency=DateOffset(years=120)) - - def apply(self, population): - - df = population.props - now = self.sim.date - rng = self.module.rng - # Make everyone who is alive and not infected (no-one should be) susceptible - susc_idx = df.loc[ - df.is_alive - & (df.tb_inf != "active") - ].index - - len(susc_idx) - - middle_index = len(susc_idx) // 2 - - # Will equally split two strains among the population - list_ds = susc_idx[:middle_index] - list_mdr = susc_idx[middle_index:] - - # schedule onset of active tb. This will be equivalent to the "Onset", so it - # doesn't matter how long after we have decided which infection this is. - for person_id in list_ds: - date_progression = now + pd.DateOffset( - # At some point during their lifetime, this person will develop TB - days=self.module.rng.randint(0, 365*(int(self.sim.end_date.year - self.sim.date.year)+1)) - ) - # set date of active tb - properties will be updated at TbActiveEvent poll daily - df.at[person_id, "tb_scheduled_date_active"] = date_progression - df.at[person_id, "tb_strain"] = "ds" - - for person_id in list_mdr: - date_progression = now + pd.DateOffset( - days=rng.randint(0, 365*int(self.sim.end_date.year - self.sim.start_date.year + 1)) - ) - # set date of active tb - properties will be updated at TbActiveEvent poll daily - df.at[person_id, "tb_scheduled_date_active"] = date_progression - df.at[person_id, "tb_strain"] = "mdr" - class TbActiveCasePoll(RegularEvent, PopulationScopeEventMixin): """The Tb Regular Poll Event for assigning active infections @@ -1527,6 +1476,7 @@ def apply(self, population): self.module.update_parameters_for_program_scaleup() # note also culture test used in target/max scale-up in place of clinical dx + class TbActiveEvent(RegularEvent, PopulationScopeEventMixin): """ * check for those with dates of active tb onset within last time-period From 29dd543c2c182a724c7c9099bdeb5cf5ec439363 Mon Sep 17 00:00:00 2001 From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com> Date: Sat, 22 Nov 2025 09:45:09 +0000 Subject: [PATCH 37/54] Overwrite any changes to demography file --- src/tlo/methods/demography.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/tlo/methods/demography.py b/src/tlo/methods/demography.py index b8fa40b7df..2acaad75eb 100644 --- a/src/tlo/methods/demography.py +++ b/src/tlo/methods/demography.py @@ -324,10 +324,9 @@ def initialise_simulation(self, sim): # Launch the repeating event that will store statistics about the population structure sim.schedule_event(DemographyLoggingEvent(self), sim.date) - if sim.generate_event_chains is False: - # Create (and store pointer to) the OtherDeathPoll and schedule first occurrence immediately - self.other_death_poll = OtherDeathPoll(self) - sim.schedule_event(self.other_death_poll, sim.date) + # Create (and store pointer to) the OtherDeathPoll and schedule first occurrence immediately + self.other_death_poll = OtherDeathPoll(self) + sim.schedule_event(self.other_death_poll, sim.date) # Log the initial population scaling-factor (to the logger of this module and that of `tlo.methods.population`) for _logger in (logger, logger_scale_factor): From 33f1143e1b2c46113c498a3fde5fe0799a2a6be7 Mon Sep 17 00:00:00 2001 From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com> Date: Sat, 22 Nov 2025 18:00:52 +0000 Subject: [PATCH 38/54] Remove outdated test related to RTI data harvesting --- tests/test_rti.py | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/tests/test_rti.py b/tests/test_rti.py index b696a249f5..711215b8cf 100644 --- a/tests/test_rti.py +++ b/tests/test_rti.py @@ -25,17 +25,6 @@ end_date = Date(2012, 1, 1) popsize = 1000 -@pytest.mark.slow -def test_data_harvesting(seed): - """ - This test runs a simulation with a functioning health system with full service availability and no set - constraints - """ - # create sim object - sim = create_basic_rti_sim(popsize, seed) - # run simulation - sim.simulate(end_date=end_date) - exit(-1) def check_dtypes(simulation): # check types of columns in dataframe, check they are the same, list those that aren't From af477c29485ee7b2d4d380753d9846b7d93c19c5 Mon Sep 17 00:00:00 2001 From: Asif Tamuri Date: Mon, 24 Nov 2025 09:51:58 +0000 Subject: [PATCH 39/54] Add a very simple synchronous notification dispatcher - avoided using the more typical naming `event` or `signal` because they are already used. --- src/tlo/notify.py | 64 ++++++++++++++++++++++++++++++++++++++++++++ tests/test_notify.py | 23 ++++++++++++++++ 2 files changed, 87 insertions(+) create mode 100644 src/tlo/notify.py create mode 100644 tests/test_notify.py diff --git a/src/tlo/notify.py b/src/tlo/notify.py new file mode 100644 index 0000000000..28765f5afd --- /dev/null +++ b/src/tlo/notify.py @@ -0,0 +1,64 @@ +""" +A dead simple synchronous notification dispatcher. + +Usage +----- +# In the notifying class/module +from tlo.notify import notifier + +notifier.dispatch("simulation.on_start", data={"one": 1, "two": 2}) + +# In the listening class/module +from tlo.notify import notifier + +def on_notification(data): + print("Received notification:", data) + +notifier.add_listener("simulation.on_start", on_notification) +""" + +class Notifier: + """ + A simple synchronous notification dispatcher supporting listeners. + """ + + def __init__(self): + self.listeners = {} + + def add_listener(self, notification_key, listener): + """ + Register a listener for a specific notification. + + :param notification_key: The identifier to listen for. + :param listener: A callable to be invoked when the notification is dispatched. + """ + if notification_key not in self.listeners: + self.listeners[notification_key] = [] + self.listeners[notification_key].append(listener) + + def remove_listener(self, notification_key, listener): + """ + Remove a previously registered listener for a notification. + + :param notification_key: The identifier. + :param listener: The listener callable to remove. + """ + if notification_key in self.listeners: + self.listeners[notification_key].remove(listener) + if not self.listeners[notification_key]: + del self.listeners[notification_key] + + def dispatch(self, notification_key, data=None): + """ + Dispatch a notification to all registered listeners. + + :param notification_key: The identifier. + :param data: Optional data to pass to each listener. + """ + if notification_key in self.listeners: + for listener in list(self.listeners[notification_key]): + listener(data) + +# Create a global dispatcher instance +notifier = Notifier() + diff --git a/tests/test_notify.py b/tests/test_notify.py new file mode 100644 index 0000000000..e71e2acb9a --- /dev/null +++ b/tests/test_notify.py @@ -0,0 +1,23 @@ +from tlo.notify import notifier + +def test_notifier(): + # in listening code + received_data = [] + + def callback(data): + received_data.append(data) + + notifier.add_listener("test.signal", callback) + + # in emitting code + notifier.dispatch("test.signal", data={"value": 42}) + + assert len(received_data) == 1 + assert received_data[0] == {"value": 42} + + # Unsubscribe and test no further calls + notifier.remove_listener("test.signal", callback) + notifier.dispatch("test.signal", data={"value": 100}) + + assert len(received_data) == 1 # No new data + From 01e35d0079877dd7d12cdbd2cb6f7b285fef863f Mon Sep 17 00:00:00 2001 From: Asif Tamuri Date: Mon, 24 Nov 2025 10:02:59 +0000 Subject: [PATCH 40/54] Fix comment --- src/tlo/notify.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/tlo/notify.py b/src/tlo/notify.py index 28765f5afd..325131a1c7 100644 --- a/src/tlo/notify.py +++ b/src/tlo/notify.py @@ -59,6 +59,6 @@ def dispatch(self, notification_key, data=None): for listener in list(self.listeners[notification_key]): listener(data) -# Create a global dispatcher instance +# Create a global notifier instance notifier = Notifier() From 9f23fcbeb46e2af5b6a1c6334aa579574ec18b66 Mon Sep 17 00:00:00 2001 From: Asif Tamuri Date: Mon, 24 Nov 2025 10:23:00 +0000 Subject: [PATCH 41/54] Fix formatting --- src/tlo/notify.py | 3 ++- tests/test_notify.py | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/src/tlo/notify.py b/src/tlo/notify.py index 325131a1c7..2906fa712a 100644 --- a/src/tlo/notify.py +++ b/src/tlo/notify.py @@ -17,6 +17,7 @@ def on_notification(data): notifier.add_listener("simulation.on_start", on_notification) """ + class Notifier: """ A simple synchronous notification dispatcher supporting listeners. @@ -59,6 +60,6 @@ def dispatch(self, notification_key, data=None): for listener in list(self.listeners[notification_key]): listener(data) + # Create a global notifier instance notifier = Notifier() - diff --git a/tests/test_notify.py b/tests/test_notify.py index e71e2acb9a..ad5e828bbf 100644 --- a/tests/test_notify.py +++ b/tests/test_notify.py @@ -1,5 +1,6 @@ from tlo.notify import notifier + def test_notifier(): # in listening code received_data = [] @@ -20,4 +21,3 @@ def callback(data): notifier.dispatch("test.signal", data={"value": 100}) assert len(received_data) == 1 # No new data - From 5ff53bb7e104e46969199dbfefc15e3fccc02eec Mon Sep 17 00:00:00 2001 From: Asif Tamuri Date: Mon, 24 Nov 2025 12:23:49 +0000 Subject: [PATCH 42/54] Remove unnecessary list wrap --- src/tlo/notify.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/tlo/notify.py b/src/tlo/notify.py index 2906fa712a..48c46b82b4 100644 --- a/src/tlo/notify.py +++ b/src/tlo/notify.py @@ -57,7 +57,7 @@ def dispatch(self, notification_key, data=None): :param data: Optional data to pass to each listener. """ if notification_key in self.listeners: - for listener in list(self.listeners[notification_key]): + for listener in self.listeners[notification_key]: listener(data) From 16f5e6701b03e826830352eeef8657991eae94bd Mon Sep 17 00:00:00 2001 From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com> Date: Mon, 24 Nov 2025 17:08:51 +0000 Subject: [PATCH 43/54] Use broadcaster to collect events. Need to expand to include HSI events --- src/tlo/events.py | 11 + src/tlo/methods/collect_event_chains.py | 281 ++++++++++++++++++++++++ src/tlo/methods/fullmodel.py | 2 + src/tlo/simulation.py | 5 + 4 files changed, 299 insertions(+) create mode 100644 src/tlo/methods/collect_event_chains.py diff --git a/src/tlo/events.py b/src/tlo/events.py index f03f150f92..dce44656bd 100644 --- a/src/tlo/events.py +++ b/src/tlo/events.py @@ -11,6 +11,7 @@ import pandas as pd +from tlo.notify import notifier from tlo.util import convert_chain_links_into_EAV import copy @@ -296,7 +297,12 @@ def run(self): """Make the event happen.""" # Collect relevant information before event takes place + # If statement outside or inside dispatch notification? if self.sim.generate_event_chains: + + # Dispatch notification that event is about to run + notifier.dispatch("event_about_to_run", data={"target": self.target, "EventName": type(self).__name__}) + print_chains, row_before, df_before, mni_row_before, entire_mni_before, mni_instances_before = self.store_chains_to_do_before_event() self.apply(self.target) @@ -305,6 +311,11 @@ def run(self): # Collect event info + meaningful property changes of individuals. Combined, these will constitute a 'link' # in the individual's event chain. if self.sim.generate_event_chains and print_chains: + + print("About to pass") + # Dispatch notification that event is about to run + notifier.dispatch("event_has_just_ran", data={"target": self.target, "EventName": type(self).__name__}) + chain_links = self.store_chains_to_do_after_event(row_before, df_before, mni_row_before, entire_mni_before, mni_instances_before) if chain_links: diff --git a/src/tlo/methods/collect_event_chains.py b/src/tlo/methods/collect_event_chains.py new file mode 100644 index 0000000000..553e095b0b --- /dev/null +++ b/src/tlo/methods/collect_event_chains.py @@ -0,0 +1,281 @@ +from tlo.notify import notifier + +from pathlib import Path +from typing import Optional +from tlo import Module, logging, population +from tlo.population import Population +import pandas as pd + +from tlo.util import convert_chain_links_into_EAV + +import copy + +logger = logging.getLogger(__name__) +logger.setLevel(logging.INFO) + +class CollectEventChains(Module): + + def __init__(self, name=None): + super().__init__(name) + + # This is how I am passing data from fnc taking place before event to the one after + # It doesn't seem very elegant but not sure how else to go about it + self.print_chains = False + self.df_before = [] + self.row_before = pd.Series() + self.mni_instances_before = False + self.mni_row_before = {} + self.entire_mni_before = {} + + def initialise_simulation(self, sim): + notifier.add_listener("event_about_to_run", self.on_notification_event_about_to_run) + notifier.add_listener("event_has_just_ran", self.on_notification_event_has_just_ran) + + def read_parameters(self, resourcefilepath: Optional[Path] = None): + pass + + def initialise_population(self, population): + pass + + def on_birth(self, mother, child): + pass + + def on_notification_sim_about_to_start(self,data): + pass + + def on_notification_event_about_to_run(self, data): + """Do this when notified that an event is about to run. This function checks whether this event should be logged as part of the event chains, and if so stored required information before the event has occurred. """ + print("This is the data I received ", data) + + # Initialise these variables + self.print_chains = False + self.df_before = [] + self.row_before = pd.Series() + self.mni_instances_before = False + self.mni_row_before = {} + self.entire_mni_before = {} + + print("My Modules") + print(self.sim.modules.keys()) + # Only print event if it belongs to modules of interest and if it is not in the list of events to ignore + if all(sub not in str(data['EventName']) for sub in self.sim.generate_event_chains_ignore_events): + + # Will eventually use this once I can actually GET THE NAME OF THE SELF + #if not set(self.sim.generate_event_chains_ignore_events).intersection(str(self)): + + self.print_chains = True + + # Target is single individual + if not isinstance(data["target"], Population): + + # Save row for comparison after event has occurred + self.row_before = self.sim.population.props.loc[abs(data['target'])].copy().fillna(-99999) + + # Check if individual is already in mni dictionary, if so copy her original status + if 'PregnancySupervisor' in self.sim.modules: + mni = self.sim.modules['PregnancySupervisor'].mother_and_newborn_info + if data['target'] in mni: + self.mni_instances_before = True + self.mni_row_before = mni[data['target']].copy() + else: + self.mni_row_before = None + + else: + + # This will be a population-wide event. In order to find individuals for which this led to + # a meaningful change, make a copy of the while pop dataframe/mni before the event has occurred. + self.df_before = self.sim.population.props.copy() + if 'PregnancySupervisor' in self.sim.modules: + self.entire_mni_before = copy.deepcopy(self.sim.modules['PregnancySupervisor'].mother_and_newborn_info) + else: + self.entire_mni_before = None + + return + + + def on_notification_event_has_just_ran(self, data): + """ If print_chains=True, this function logs the event and identifies and logs the any property changes that have occured to one or multiple individuals as a result of the event taking place. """ + print("This is the data I received ", data) + + chain_links = {} + + # Target is single individual + if not isinstance(data["target"], Population): + + # Copy full new status for individual + row_after = self.sim.population.props.loc[abs(data['target'])].fillna(-99999) + + # Check if individual is in mni after the event + mni_instances_after = False + if 'PregnancySupervisor' in self.sim.modules: + mni = self.sim.modules['PregnancySupervisor'].mother_and_newborn_info + if data['target'] in mni: + mni_instances_after = True + else: + mni_instances_after = None + + # Create and store event for this individual, regardless of whether any property change occurred + link_info = { + 'EventName' : data['EventName'], + } + + # Store (if any) property changes as a result of the event for this individual + for key in self.row_before.index: + if self.row_before[key] != row_after[key]: # Note: used fillna previously, so this is safe + link_info[key] = row_after[key] + + if 'PregnancySupervisor' in self.sim.modules: + # Now check and store changes in the mni dictionary, accounting for following cases: + # Individual is in mni dictionary before and after + if self.mni_instances_before and mni_instances_after: + for key in self.mni_row_before: + if self.mni_values_differ(mni_row_before[key], mni[data['target']][key]): + link_info[key] = mni[data['target']][key] + # Individual is only in mni dictionary before event + elif self.mni_instances_before and not mni_instances_after: + default = self.sim.modules['PregnancySupervisor'].default_all_mni_values + for key in self.mni_row_before: + if self.mni_values_differ(mni_row_before[key], default[key]): + link_info[key] = default[key] + # Individual is only in mni dictionary after event + elif mni_instances_after and not self.mni_instances_before: + default = self.sim.modules['PregnancySupervisor'].default_all_mni_values + for key in default: + if self.mni_values_differ(default[key], mni[data['target']][key]): + link_info[key] = mni[data['target']][key] + # Else, no need to do anything + + # Add individual to the chain links + chain_links[data['target']] = link_info + + else: + # Target is entire population. Identify individuals for which properties have changed + # and store their changes. + + # Population frame after event + df_after = self.sim.population.props + if 'PregnancySupervisor' in self.sim.modules: + entire_mni_after = copy.deepcopy(self.sim.modules['PregnancySupervisor'].mother_and_newborn_info) + else: + entire_mni_after = None + + # Create and store the event and dictionary of changes for affected individuals + chain_links = self.compare_population_dataframe_and_mni(self.df_before, df_after, self.entire_mni_before, entire_mni_after) + + if chain_links: + # Convert chain_links into EAV + ednav = convert_chain_links_into_EAV(chain_links) + + logger.info(key='event_chains', + data= ednav.to_dict(), + description='Links forming chains of events for simulated individuals') + + # Reset variables + self.print_chains = False + self.df_before = [] + self.row_before = pd.Series() + self.mni_instances_before = False + self.mni_row_before = {} + self.entire_mni_before = {} + + return + + def mni_values_differ(self, v1, v2): + + if isinstance(v1, list) and isinstance(v2, list): + return v1 != v2 # simple element-wise comparison + + if pd.isna(v1) and pd.isna(v2): + return False # treat both NaT/NaN as equal + return v1 != v2 + + def compare_entire_mni_dicts(self,entire_mni_before, entire_mni_after): + diffs = {} + + all_individuals = set(entire_mni_before.keys()) | set(entire_mni_after.keys()) + + for person in all_individuals: + if person not in entire_mni_before: # but is afterward + for key in entire_mni_after[person]: + if self.mni_values_differ(entire_mni_after[person][key],self.sim.modules['PregnancySupervisor'].default_all_mni_values[key]): + if person not in diffs: + diffs[person] = {} + diffs[person][key] = entire_mni_after[person][key] + + elif person not in entire_mni_after: # but is beforehand + for key in entire_mni_before[person]: + if self.mni_values_differ(entire_mni_before[person][key],self.sim.modules['PregnancySupervisor'].default_all_mni_values[key]): + if person not in diffs: + diffs[person] = {} + diffs[person][key] = self.sim.modules['PregnancySupervisor'].default_all_mni_values[key] + + else: # person is in both + # Compare properties + for key in entire_mni_before[person]: + if self.mni_values_differ(entire_mni_before[person][key],entire_mni_after[person][key]): + if person not in diffs: + diffs[person] = {} + diffs[person][key] = entire_mni_after[person][key] + + return diffs + + def compare_population_dataframe_and_mni(self,df_before, df_after, entire_mni_before, entire_mni_after): + """ This function compares the population dataframe and mni dictionary before/after a population-wide event has occurred. + It allows us to identify the individuals for which this event led to a significant (i.e. property) change, and to store the properties which have changed as a result of it. """ + + # Create a mask of where values are different + diff_mask = (df_before != df_after) & ~(df_before.isna() & df_after.isna()) + if 'PregnancySupervisor' in self.sim.modules: + diff_mni = self.compare_entire_mni_dicts(entire_mni_before, entire_mni_after) + else: + diff_mni = [] + + # Create an empty list to store changes for each of the individuals + chain_links = {} + len_of_diff = len(diff_mask) + + # Loop through each row of the mask + persons_changed = [] + + for idx, row in diff_mask.iterrows(): + changed_cols = row.index[row].tolist() + + if changed_cols: # Proceed only if there are changes in the row + persons_changed.append(idx) + # Create a dictionary for this person + # First add event info + link_info = { + 'EventName': type(self).__name__, + } + + # Store the new values from df_after for the changed columns + for col in changed_cols: + link_info[col] = df_after.at[idx, col] + + if idx in diff_mni: + # This person has also undergone changes in the mni dictionary, so add these here + for key in diff_mni[idx]: + link_info[col] = diff_mni[idx][key] + + # Append the event and changes to the individual key + chain_links[idx] = link_info + + if 'PregnancySupervisor' in self.sim.modules: + # For individuals which only underwent changes in mni dictionary, save changes here + if len(diff_mni)>0: + for key in diff_mni: + if key not in persons_changed: + # If individual hadn't been previously added due to changes in pop df, add it here + link_info = { + 'EventName': type(self).__name__, + } + + for key_prop in diff_mni[key]: + link_info[key_prop] = diff_mni[key][key_prop] + + chain_links[key] = link_info + + return chain_links + + + diff --git a/src/tlo/methods/fullmodel.py b/src/tlo/methods/fullmodel.py index 3f0c79434e..3c710c7dd2 100644 --- a/src/tlo/methods/fullmodel.py +++ b/src/tlo/methods/fullmodel.py @@ -8,6 +8,7 @@ cardio_metabolic_disorders, care_of_women_during_pregnancy, cervical_cancer, + collect_event_chains, contraception, copd, demography, @@ -116,6 +117,7 @@ def fullmodel( copd.Copd, depression.Depression, epilepsy.Epilepsy, + collect_event_chains.CollectEventChains, ] return [ module_class( diff --git a/src/tlo/simulation.py b/src/tlo/simulation.py index b0c95683c1..eac1bbdc89 100644 --- a/src/tlo/simulation.py +++ b/src/tlo/simulation.py @@ -13,6 +13,10 @@ import pandas as pd import tlo.population import numpy as np +import tlo.methods.collect_event_chains + +from tlo.notify import notifier +from tlo.methods.collect_event_chains import CollectEventChains from tlo.util import df_to_EAV, convert_chain_links_into_EAV try: @@ -148,6 +152,7 @@ def __init__( # Whether simulation has been initialised self._initialised = False + def _configure_logging( self, From ebe0ebc6644f3a96bac01c7efb9f3ad47378048a Mon Sep 17 00:00:00 2001 From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com> Date: Tue, 25 Nov 2025 11:28:02 +0000 Subject: [PATCH 44/54] Use broadcasting in HSI events too --- .../parameter_values.csv | 3 + src/tlo/events.py | 276 +--------------- src/tlo/methods/collect_event_chains.py | 297 ++++++++++-------- src/tlo/methods/hsi_event.py | 155 ++------- src/tlo/simulation.py | 61 +--- 5 files changed, 209 insertions(+), 583 deletions(-) create mode 100644 resources/ResourceFile_GenerateEventChains/parameter_values.csv diff --git a/resources/ResourceFile_GenerateEventChains/parameter_values.csv b/resources/ResourceFile_GenerateEventChains/parameter_values.csv new file mode 100644 index 0000000000..82394e590b --- /dev/null +++ b/resources/ResourceFile_GenerateEventChains/parameter_values.csv @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f68e30f87dbe757b98cea2658c8f0c40cab629c4b6825a012ce90e12a27bc612 +size 102 diff --git a/src/tlo/events.py b/src/tlo/events.py index dce44656bd..dba2f33cd5 100644 --- a/src/tlo/events.py +++ b/src/tlo/events.py @@ -4,28 +4,12 @@ from enum import Enum from typing import TYPE_CHECKING -from tlo import DateOffset, logging +from tlo import DateOffset if TYPE_CHECKING: from tlo import Simulation -import pandas as pd - from tlo.notify import notifier -from tlo.util import convert_chain_links_into_EAV - -import copy - -logger = logging.getLogger(__name__) -logger.setLevel(logging.INFO) - -logger_chain = logging.getLogger('tlo.simulation') -logger_chain.setLevel(logging.INFO) - -logger_summary = logging.getLogger(f"{__name__}.summary") -logger_summary.setLevel(logging.INFO) - -debug_chains = True class Priority(Enum): """Enumeration for the Priority, which is used in sorting the events in the simulation queue.""" @@ -39,7 +23,6 @@ def __lt__(self, other): return self.value < other.value return NotImplemented - class Event: """Base event class, from which all others inherit. @@ -78,265 +61,20 @@ def apply(self, target): """ raise NotImplementedError - def mni_values_differ(self, v1, v2): - - if isinstance(v1, list) and isinstance(v2, list): - return v1 != v2 # simple element-wise comparison - - if pd.isna(v1) and pd.isna(v2): - return False # treat both NaT/NaN as equal - return v1 != v2 - - def compare_entire_mni_dicts(self,entire_mni_before, entire_mni_after): - diffs = {} - - all_individuals = set(entire_mni_before.keys()) | set(entire_mni_after.keys()) - - for person in all_individuals: - if person not in entire_mni_before: # but is afterward - for key in entire_mni_after[person]: - if self.mni_values_differ(entire_mni_after[person][key],self.sim.modules['PregnancySupervisor'].default_all_mni_values[key]): - if person not in diffs: - diffs[person] = {} - diffs[person][key] = entire_mni_after[person][key] - - elif person not in entire_mni_after: # but is beforehand - for key in entire_mni_before[person]: - if self.mni_values_differ(entire_mni_before[person][key],self.sim.modules['PregnancySupervisor'].default_all_mni_values[key]): - if person not in diffs: - diffs[person] = {} - diffs[person][key] = self.sim.modules['PregnancySupervisor'].default_all_mni_values[key] - - else: # person is in both - # Compare properties - for key in entire_mni_before[person]: - if self.mni_values_differ(entire_mni_before[person][key],entire_mni_after[person][key]): - if person not in diffs: - diffs[person] = {} - diffs[person][key] = entire_mni_after[person][key] - - return diffs - - def compare_population_dataframe_and_mni(self,df_before, df_after, entire_mni_before, entire_mni_after): - """ This function compares the population dataframe and mni dictionary before/after a population-wide event has occurred. - It allows us to identify the individuals for which this event led to a significant (i.e. property) change, and to store the properties which have changed as a result of it. """ - - # Create a mask of where values are different - diff_mask = (df_before != df_after) & ~(df_before.isna() & df_after.isna()) - if 'PregnancySupervisor' in self.sim.modules: - diff_mni = self.compare_entire_mni_dicts(entire_mni_before, entire_mni_after) - else: - diff_mni = [] - - # Create an empty list to store changes for each of the individuals - chain_links = {} - len_of_diff = len(diff_mask) - - # Loop through each row of the mask - persons_changed = [] - - for idx, row in diff_mask.iterrows(): - changed_cols = row.index[row].tolist() - - if changed_cols: # Proceed only if there are changes in the row - persons_changed.append(idx) - # Create a dictionary for this person - # First add event info - link_info = { - 'EventName': type(self).__name__, - } - - # Store the new values from df_after for the changed columns - for col in changed_cols: - link_info[col] = df_after.at[idx, col] - - if idx in diff_mni: - # This person has also undergone changes in the mni dictionary, so add these here - for key in diff_mni[idx]: - link_info[col] = diff_mni[idx][key] - - # Append the event and changes to the individual key - chain_links[idx] = link_info - - if 'PregnancySupervisor' in self.sim.modules: - # For individuals which only underwent changes in mni dictionary, save changes here - if len(diff_mni)>0: - for key in diff_mni: - if key not in persons_changed: - # If individual hadn't been previously added due to changes in pop df, add it here - link_info = { - 'EventName': type(self).__name__, - } - - for key_prop in diff_mni[key]: - link_info[key_prop] = diff_mni[key][key_prop] - - chain_links[key] = link_info - - return chain_links - - - def store_chains_to_do_before_event(self) -> tuple[bool, pd.Series, pd.DataFrame, dict, dict, bool]: - """ This function checks whether this event should be logged as part of the event chains, and if so stored required information before the event has occurred. """ - - # Initialise these variables - print_chains = False - df_before = [] - row_before = pd.Series() - mni_instances_before = False - mni_row_before = {} - entire_mni_before = {} - - # Only print event if it belongs to modules of interest and if it is not in the list of events to ignore - if all(sub not in str(self) for sub in self.sim.generate_event_chains_ignore_events): - - # Will eventually use this once I can actually GET THE NAME OF THE SELF - #if not set(self.sim.generate_event_chains_ignore_events).intersection(str(self)): - - print_chains = True - - # Target is single individual - if self.target != self.sim.population: - - # Save row for comparison after event has occurred - row_before = self.sim.population.props.loc[abs(self.target)].copy().fillna(-99999) - - # Check if individual is already in mni dictionary, if so copy her original status - if 'PregnancySupervisor' in self.sim.modules: - mni = self.sim.modules['PregnancySupervisor'].mother_and_newborn_info - if self.target in mni: - mni_instances_before = True - mni_row_before = mni[self.target].copy() - else: - mni_row_before = None - - else: - - # This will be a population-wide event. In order to find individuals for which this led to - # a meaningful change, make a copy of the while pop dataframe/mni before the event has occurred. - df_before = self.sim.population.props.copy() - if 'PregnancySupervisor' in self.sim.modules: - entire_mni_before = copy.deepcopy(self.sim.modules['PregnancySupervisor'].mother_and_newborn_info) - else: - entire_mni_before = None - - return print_chains, row_before, df_before, mni_row_before, entire_mni_before, mni_instances_before - - def store_chains_to_do_after_event(self, row_before, df_before, mni_row_before, entire_mni_before, mni_instances_before) -> dict: - """ If print_chains=True, this function logs the event and identifies and logs the any property changes that have occured to one or multiple individuals as a result of the event taking place. """ - - chain_links = {} - - # Target is single individual - if self.target != self.sim.population: - - # Copy full new status for individual - row_after = self.sim.population.props.loc[abs(self.target)].fillna(-99999) - - # Check if individual is in mni after the event - mni_instances_after = False - if 'PregnancySupervisor' in self.sim.modules: - mni = self.sim.modules['PregnancySupervisor'].mother_and_newborn_info - if self.target in mni: - mni_instances_after = True - else: - mni_instances_after = None - - # Create and store event for this individual, regardless of whether any property change occurred - link_info = { - 'EventName' : type(self).__name__, - } - - # Store (if any) property changes as a result of the event for this individual - for key in row_before.index: - if row_before[key] != row_after[key]: # Note: used fillna previously, so this is safe - link_info[key] = row_after[key] - - if 'PregnancySupervisor' in self.sim.modules: - # Now check and store changes in the mni dictionary, accounting for following cases: - # Individual is in mni dictionary before and after - if mni_instances_before and mni_instances_after: - for key in mni_row_before: - if self.mni_values_differ(mni_row_before[key], mni[self.target][key]): - link_info[key] = mni[self.target][key] - # Individual is only in mni dictionary before event - elif mni_instances_before and not mni_instances_after: - default = self.sim.modules['PregnancySupervisor'].default_all_mni_values - for key in mni_row_before: - if self.mni_values_differ(mni_row_before[key], default[key]): - link_info[key] = default[key] - # Individual is only in mni dictionary after event - elif mni_instances_after and not mni_instances_before: - default = self.sim.modules['PregnancySupervisor'].default_all_mni_values - for key in default: - if self.mni_values_differ(default[key], mni[self.target][key]): - link_info[key] = mni[self.target][key] - # Else, no need to do anything - - # Add individual to the chain links - chain_links[self.target] = link_info - - else: - # Target is entire population. Identify individuals for which properties have changed - # and store their changes. - - # Population frame after event - df_after = self.sim.population.props - if 'PregnancySupervisor' in self.sim.modules: - entire_mni_after = copy.deepcopy(self.sim.modules['PregnancySupervisor'].mother_and_newborn_info) - else: - entire_mni_after = None - - # Create and store the event and dictionary of changes for affected individuals - chain_links = self.compare_population_dataframe_and_mni(df_before, df_after, entire_mni_before, entire_mni_after) - - return chain_links - def run(self): """Make the event happen.""" - # Collect relevant information before event takes place - # If statement outside or inside dispatch notification? - if self.sim.generate_event_chains: - - # Dispatch notification that event is about to run - notifier.dispatch("event_about_to_run", data={"target": self.target, "EventName": type(self).__name__}) - - print_chains, row_before, df_before, mni_row_before, entire_mni_before, mni_instances_before = self.store_chains_to_do_before_event() + + # Dispatch notification that event is about to run + notifier.dispatch("event.about_to_run", data={"target": self.target, "link_info" : {"EventName": type(self).__name__}}) self.apply(self.target) self.post_apply_hook() - # Collect event info + meaningful property changes of individuals. Combined, these will constitute a 'link' - # in the individual's event chain. - if self.sim.generate_event_chains and print_chains: - - print("About to pass") - # Dispatch notification that event is about to run - notifier.dispatch("event_has_just_ran", data={"target": self.target, "EventName": type(self).__name__}) - - chain_links = self.store_chains_to_do_after_event(row_before, df_before, mni_row_before, entire_mni_before, mni_instances_before) - - if chain_links: - # Convert chain_links into EAV - ednav = convert_chain_links_into_EAV(chain_links) - - logger_chain.info(key='event_chains', - data= ednav.to_dict(), - description='Links forming chains of events for simulated individuals') - """ - # Create empty logger for entire pop - pop_dict = {i: '' for i in range(FACTOR_POP_DICT)} # Always include all possible individuals - pop_dict.update(chain_links) - - # Log chain_links here - if len(chain_links)>0: - - logger_chain.info(key='event_chains', - data= pop_dict, - description='Links forming chains of events for simulated individuals') - """ + # Dispatch notification that event has just ran + notifier.dispatch("event.has_just_ran", data={"target": self.target, "link_info" : {"EventName": type(self).__name__}}) + class RegularEvent(Event): """An event that automatically reschedules itself at a fixed frequency.""" diff --git a/src/tlo/methods/collect_event_chains.py b/src/tlo/methods/collect_event_chains.py index 553e095b0b..7fb946c524 100644 --- a/src/tlo/methods/collect_event_chains.py +++ b/src/tlo/methods/collect_event_chains.py @@ -2,11 +2,11 @@ from pathlib import Path from typing import Optional -from tlo import Module, logging, population +from tlo import Module, Parameter, Types, logging, population from tlo.population import Population import pandas as pd -from tlo.util import convert_chain_links_into_EAV +from tlo.util import df_to_EAV, convert_chain_links_into_EAV, read_csv_files import copy @@ -27,156 +27,207 @@ def __init__(self, name=None): self.mni_row_before = {} self.entire_mni_before = {} + PARAMETERS = { + # Options within module + "generate_event_chains": Parameter( + Types.BOOL, "Whether or not we want to collect chains of events for individuals" + ), + "modules_of_interest": Parameter( + Types.LIST, "Restrict the events collected to specific modules. If *, print for all modules" + ), + "events_to_ignore": Parameter( + Types.LIST, "Events to be ignored when collecting chains" + ), + } + def initialise_simulation(self, sim): - notifier.add_listener("event_about_to_run", self.on_notification_event_about_to_run) - notifier.add_listener("event_has_just_ran", self.on_notification_event_has_just_ran) + notifier.add_listener("simulation.pop_has_been_initialised", self.on_notification_pop_has_been_initialised) + notifier.add_listener("simulation.on_birth", self.on_notification_of_birth) + notifier.add_listener("event.about_to_run", self.on_notification_event_about_to_run) + notifier.add_listener("event.has_just_ran", self.on_notification_event_has_just_ran) def read_parameters(self, resourcefilepath: Optional[Path] = None): - pass + #print("resource file path", resourcefilepath) + #self.load_parameters_from_dataframe(pd.read_csv(resourcefilepath/"ResourceFile_GenerateEventChains/parameter_values.csv")) + self.parameters["generate_event_chains"] = True + self.parameters["modules_of_interest"] = self.sim.modules + + self.parameters["events_to_ignore"] =["AgeUpdateEvent","HealthSystemScheduler","SimplifiedBirthsPoll","DirectBirth","LifestyleEvent","TbActiveCasePollGenerateData","HivPollingEventForDataGeneration","RTIPollingEvent"] + def initialise_population(self, population): pass def on_birth(self, mother, child): + # Could the notification of birth simply take place here? pass - def on_notification_sim_about_to_start(self,data): - pass + def on_notification_pop_has_been_initialised(self, data): + # When logging events for each individual to reconstruct chains, only the changes in individual properties will be logged. + # At the start of the simulation + when a new individual is born, we therefore want to store all of their properties at the start. + if self.parameters['generate_event_chains']: + + # EDNAV structure to capture status of individuals at the start of the simulation + ednav = df_to_EAV(self.sim.population.props, self.sim.date, 'StartOfSimulation') + + logger.info(key='event_chains', + data = ednav.to_dict(), + description='Links forming chains of events for simulated individuals') + + + def on_notification_of_birth(self, data): + + if self.parameters['generate_event_chains']: + # When individual is born, store their initial properties to provide a starting point to the chain of property + # changes that this individual will undergo as a result of events taking place. + link_info = data['link_info'] + link_info.update(self.sim.population.props.loc[data['target']].to_dict()) + chain_links = {} + chain_links[data['target']] = link_info + + ednav = convert_chain_links_into_EAV(chain_links) + + logger.info(key='event_chains', + data = ednav.to_dict(), + description='Links forming chains of events for simulated individuals') + def on_notification_event_about_to_run(self, data): """Do this when notified that an event is about to run. This function checks whether this event should be logged as part of the event chains, and if so stored required information before the event has occurred. """ - print("This is the data I received ", data) - # Initialise these variables - self.print_chains = False - self.df_before = [] - self.row_before = pd.Series() - self.mni_instances_before = False - self.mni_row_before = {} - self.entire_mni_before = {} + p = self.parameters - print("My Modules") - print(self.sim.modules.keys()) - # Only print event if it belongs to modules of interest and if it is not in the list of events to ignore - if all(sub not in str(data['EventName']) for sub in self.sim.generate_event_chains_ignore_events): - - # Will eventually use this once I can actually GET THE NAME OF THE SELF - #if not set(self.sim.generate_event_chains_ignore_events).intersection(str(self)): + if p['generate_event_chains']: - self.print_chains = True + # Initialise these variables + self.print_chains = False + self.df_before = [] + self.row_before = pd.Series() + self.mni_instances_before = False + self.mni_row_before = {} + self.entire_mni_before = {} - # Target is single individual - if not isinstance(data["target"], Population): + # Only print event if it belongs to modules of interest and if it is not in the list of events to ignore + if all(sub not in str(data['link_info']['EventName']) for sub in p['events_to_ignore']): + + # Will eventually use this once I can actually GET THE NAME OF THE SELF + #if not set(self.sim.generate_event_chains_ignore_events).intersection(str(self)): - # Save row for comparison after event has occurred - self.row_before = self.sim.population.props.loc[abs(data['target'])].copy().fillna(-99999) + self.print_chains = True - # Check if individual is already in mni dictionary, if so copy her original status - if 'PregnancySupervisor' in self.sim.modules: - mni = self.sim.modules['PregnancySupervisor'].mother_and_newborn_info - if data['target'] in mni: - self.mni_instances_before = True - self.mni_row_before = mni[data['target']].copy() - else: - self.mni_row_before = None - - else: + # Target is single individual + if not isinstance(data['target'], Population): - # This will be a population-wide event. In order to find individuals for which this led to - # a meaningful change, make a copy of the while pop dataframe/mni before the event has occurred. - self.df_before = self.sim.population.props.copy() - if 'PregnancySupervisor' in self.sim.modules: - self.entire_mni_before = copy.deepcopy(self.sim.modules['PregnancySupervisor'].mother_and_newborn_info) + # Save row for comparison after event has occurred + self.row_before = self.sim.population.props.loc[abs(data['target'])].copy().fillna(-99999) + + # Check if individual is already in mni dictionary, if so copy her original status + if 'PregnancySupervisor' in self.sim.modules: + mni = self.sim.modules['PregnancySupervisor'].mother_and_newborn_info + if data['target'] in mni: + self.mni_instances_before = True + self.mni_row_before = mni[data['target']].copy() + else: + self.mni_row_before = None + else: - self.entire_mni_before = None + + # This will be a population-wide event. In order to find individuals for which this led to + # a meaningful change, make a copy of the while pop dataframe/mni before the event has occurred. + self.df_before = self.sim.population.props.copy() + if 'PregnancySupervisor' in self.sim.modules: + self.entire_mni_before = copy.deepcopy(self.sim.modules['PregnancySupervisor'].mother_and_newborn_info) + else: + self.entire_mni_before = None return def on_notification_event_has_just_ran(self, data): """ If print_chains=True, this function logs the event and identifies and logs the any property changes that have occured to one or multiple individuals as a result of the event taking place. """ - print("This is the data I received ", data) + + p = self.parameters - chain_links = {} - - # Target is single individual - if not isinstance(data["target"], Population): - - # Copy full new status for individual - row_after = self.sim.population.props.loc[abs(data['target'])].fillna(-99999) - - # Check if individual is in mni after the event - mni_instances_after = False - if 'PregnancySupervisor' in self.sim.modules: - mni = self.sim.modules['PregnancySupervisor'].mother_and_newborn_info - if data['target'] in mni: - mni_instances_after = True - else: - mni_instances_after = None - - # Create and store event for this individual, regardless of whether any property change occurred - link_info = { - 'EventName' : data['EventName'], - } - - # Store (if any) property changes as a result of the event for this individual - for key in self.row_before.index: - if self.row_before[key] != row_after[key]: # Note: used fillna previously, so this is safe - link_info[key] = row_after[key] - - if 'PregnancySupervisor' in self.sim.modules: - # Now check and store changes in the mni dictionary, accounting for following cases: - # Individual is in mni dictionary before and after - if self.mni_instances_before and mni_instances_after: - for key in self.mni_row_before: - if self.mni_values_differ(mni_row_before[key], mni[data['target']][key]): - link_info[key] = mni[data['target']][key] - # Individual is only in mni dictionary before event - elif self.mni_instances_before and not mni_instances_after: - default = self.sim.modules['PregnancySupervisor'].default_all_mni_values - for key in self.mni_row_before: - if self.mni_values_differ(mni_row_before[key], default[key]): - link_info[key] = default[key] - # Individual is only in mni dictionary after event - elif mni_instances_after and not self.mni_instances_before: - default = self.sim.modules['PregnancySupervisor'].default_all_mni_values - for key in default: - if self.mni_values_differ(default[key], mni[data['target']][key]): - link_info[key] = mni[data['target']][key] - # Else, no need to do anything - - # Add individual to the chain links - chain_links[data['target']] = link_info - - else: - # Target is entire population. Identify individuals for which properties have changed - # and store their changes. - - # Population frame after event - df_after = self.sim.population.props - if 'PregnancySupervisor' in self.sim.modules: - entire_mni_after = copy.deepcopy(self.sim.modules['PregnancySupervisor'].mother_and_newborn_info) + if p['generate_event_chains'] and self.print_chains: + + chain_links = {} + + # Target is single individual + if not isinstance(data["target"], Population): + + # Copy full new status for individual + row_after = self.sim.population.props.loc[abs(data['target'])].fillna(-99999) + + # Check if individual is in mni after the event + mni_instances_after = False + if 'PregnancySupervisor' in self.sim.modules: + mni = self.sim.modules['PregnancySupervisor'].mother_and_newborn_info + if data['target'] in mni: + mni_instances_after = True + else: + mni_instances_after = None + + # Create and store event for this individual, regardless of whether any property change occurred + link_info = data['link_info'] + + # Store (if any) property changes as a result of the event for this individual + for key in self.row_before.index: + if self.row_before[key] != row_after[key]: # Note: used fillna previously, so this is safe + link_info[key] = row_after[key] + + if 'PregnancySupervisor' in self.sim.modules: + # Now check and store changes in the mni dictionary, accounting for following cases: + # Individual is in mni dictionary before and after + if self.mni_instances_before and mni_instances_after: + for key in self.mni_row_before: + if self.mni_values_differ(self.mni_row_before[key], mni[data['target']][key]): + link_info[key] = mni[data['target']][key] + # Individual is only in mni dictionary before event + elif self.mni_instances_before and not mni_instances_after: + default = self.sim.modules['PregnancySupervisor'].default_all_mni_values + for key in self.mni_row_before: + if self.mni_values_differ(self.mni_row_before[key], default[key]): + link_info[key] = default[key] + # Individual is only in mni dictionary after event + elif mni_instances_after and not self.mni_instances_before: + default = self.sim.modules['PregnancySupervisor'].default_all_mni_values + for key in default: + if self.mni_values_differ(default[key], mni[data['target']][key]): + link_info[key] = mni[data['target']][key] + # Else, no need to do anything + + # Add individual to the chain links + chain_links[data['target']] = link_info + else: - entire_mni_after = None - - # Create and store the event and dictionary of changes for affected individuals - chain_links = self.compare_population_dataframe_and_mni(self.df_before, df_after, self.entire_mni_before, entire_mni_after) - - if chain_links: - # Convert chain_links into EAV - ednav = convert_chain_links_into_EAV(chain_links) - - logger.info(key='event_chains', - data= ednav.to_dict(), - description='Links forming chains of events for simulated individuals') - - # Reset variables - self.print_chains = False - self.df_before = [] - self.row_before = pd.Series() - self.mni_instances_before = False - self.mni_row_before = {} - self.entire_mni_before = {} + # Target is entire population. Identify individuals for which properties have changed + # and store their changes. + + # Population frame after event + df_after = self.sim.population.props + if 'PregnancySupervisor' in self.sim.modules: + entire_mni_after = copy.deepcopy(self.sim.modules['PregnancySupervisor'].mother_and_newborn_info) + else: + entire_mni_after = None + + # Create and store the event and dictionary of changes for affected individuals + chain_links = self.compare_population_dataframe_and_mni(self.df_before, df_after, self.entire_mni_before, entire_mni_after) + + if chain_links: + # Convert chain_links into EAV + ednav = convert_chain_links_into_EAV(chain_links) + + logger.info(key='event_chains', + data= ednav.to_dict(), + description='Links forming chains of events for simulated individuals') + + # Reset variables + self.print_chains = False + self.df_before = [] + self.row_before = pd.Series() + self.mni_instances_before = False + self.mni_row_before = {} + self.entire_mni_before = {} return diff --git a/src/tlo/methods/hsi_event.py b/src/tlo/methods/hsi_event.py index edb5d3df3b..01bd826f2d 100644 --- a/src/tlo/methods/hsi_event.py +++ b/src/tlo/methods/hsi_event.py @@ -7,28 +7,19 @@ from tlo import Date, logging from tlo.events import Event -from tlo.population import Population -from tlo.util import convert_chain_links_into_EAV -import pandas as pd +from tlo.notify import notifier if TYPE_CHECKING: from tlo import Module, Simulation from tlo.methods.healthsystem import HealthSystem -# Pointing to the logger in events -logger_chains = logging.getLogger("tlo.simulation") -logger_chains.setLevel(logging.INFO) - logger = logging.getLogger(__name__) logger.setLevel(logging.INFO) logger_summary = logging.getLogger(f"{__name__}.summary") logger_summary.setLevel(logging.INFO) -debug_chains = True - - # Declare the level which will be used to represent the merging of levels '1b' and '2' LABEL_FOR_MERGED_FACILITY_LEVELS_1B_AND_2 = "2" @@ -204,144 +195,34 @@ def _run_after_hsi_event(self) -> None: item_codes=self._EQUIPMENT, facility_id=self.facility_info.id ) - - def values_differ(self, v1, v2): - - if isinstance(v1, list) and isinstance(v2, list): - return v1 != v2 # simple element-wise comparison - - if pd.isna(v1) and pd.isna(v2): - return False # treat both NaT/NaN as equal - return v1 != v2 - - - def store_chains_to_do_before_event(self) -> tuple[bool, pd.Series, dict, bool]: - """ This function checks whether this event should be logged as part of the event chains, and if so stored required information before the event has occurred. """ - - # Initialise these variables - print_chains = False - row_before = pd.Series() - mni_instances_before = False - mni_row_before = {} - - # Only print event if it belongs to modules of interest and if it is not in the list of events to ignore - if (self.module in self.sim.generate_event_chains_modules_of_interest) and all(sub not in str(self) for sub in self.sim.generate_event_chains_ignore_events): - - # Will eventually use this once I can actually GET THE NAME OF THE SELF - #if not set(self.sim.generate_event_chains_ignore_events).intersection(str(self)): - - print_chains = True - - # Target is single individual - if self.target != self.sim.population: - - # Save row for comparison after event has occurred - row_before = self.sim.population.props.loc[abs(self.target)].copy().fillna(-99999) - - # Check if individual is in mni dictionary before the event, if so store its original status - if 'PregnancySupervisor' in self.sim.modules: - mni = self.sim.modules['PregnancySupervisor'].mother_and_newborn_info - if self.target in mni: - mni_instances_before = True - mni_row_before = mni[self.target].copy() - - else: - print("ERROR: there shouldn't be pop-wide HSI event") - exit(-1) - - return print_chains, row_before, mni_row_before, mni_instances_before - - def store_chains_to_do_after_event(self, row_before, footprint, mni_row_before, mni_instances_before) -> dict: - """ If print_chains=True, this function logs the event and identifies and logs the any property changes that have occured to one or multiple individuals as a result of the event taking place. """ - - # For HSI event, this will only ever occur for individual events - chain_links = {} - - row_after = self.sim.population.props.loc[abs(self.target)].fillna(-99999) - - mni_instances_after = False - if 'PregnancySupervisor' in self.sim.modules: - mni = self.sim.modules['PregnancySupervisor'].mother_and_newborn_info - if self.target in mni: - mni_instances_after = True - - # Create and store dictionary of changes. Note that person_ID, event, event_date, appt_foot, and level - # will be stored regardless of whether individual experienced property changes or not. - - # Add event details - try: - record_footprint = str(footprint) - record_level = self.facility_info.level - except: - record_footprint = 'N/A' - record_level = 'N/A' - - link_info = { - 'EventName' : type(self).__name__, - 'appt_footprint' : record_footprint, - 'level' : record_level, - } - - # Add changes to properties - for key in row_before.index: - if row_before[key] != row_after[key]: # Note: used fillna previously - link_info[key] = row_after[key] - - if 'PregnancySupervisor' in self.sim.modules: - # Now store changes in the mni dictionary, accounting for following cases: - # Individual is in mni dictionary before and after - if mni_instances_before and mni_instances_after: - for key in mni_row_before: - if self.values_differ(mni_row_before[key], mni[self.target][key]): - link_info[key] = mni[self.target][key] - # Individual is only in mni dictionary before event - elif mni_instances_before and not mni_instances_after: - default = self.sim.modules['PregnancySupervisor'].default_all_mni_values - for key in mni_row_before: - if self.values_differ(mni_row_before[key], default[key]): - link_info[key] = default[key] - # Individual is only in mni dictionary after event - elif mni_instances_after and not mni_instances_before: - default = self.sim.modules['PregnancySupervisor'].default_all_mni_values - for key in default: - if self.values_differ(default[key], mni[self.target][key]): - link_info[key] = mni[self.target][key] - - chain_links[self.target] = link_info - - return chain_links def run(self, squeeze_factor): """Make the event happen.""" - - if self.sim.generate_event_chains and self.target != self.sim.population: - print_chains, row_before, mni_row_before, mni_instances_before = self.store_chains_to_do_before_event() - - footprint = self.EXPECTED_APPT_FOOTPRINT + # Dispatch notification that HSI event is about to run + notifier.dispatch("event.about_to_run", data={"target": self.target, "link_info" : {"EventName": type(self).__name__}}) updated_appt_footprint = self.apply(self.target, squeeze_factor) self.post_apply_hook() self._run_after_hsi_event() - - - if self.sim.generate_event_chains and self.target != self.sim.population: - # If the footprint has been updated when the event ran, change it here - if updated_appt_footprint is not None: - footprint = updated_appt_footprint - - if print_chains: - chain_links = self.store_chains_to_do_after_event(row_before, str(footprint), mni_row_before, mni_instances_before) + # Dispatch notification that HSI event has just ran + if updated_appt_footprint is not None: + footprint = updated_appt_footprint + else: + footprint = self.EXPECTED_APPT_FOOTPRINT + try: + level = self.facility_info.level + except: + level = "N/A" - if chain_links: - - # Convert chain_links into EAV - ednav = convert_chain_links_into_EAV(chain_links) - logger_chain.info(key='event_chains', - data = ednav, - description='Links forming chains of events for simulated individuals') + notifier.dispatch("event.has_just_ran", + data={"target": self.target, + "link_info" : {"EventName": type(self).__name__, + "footprint": footprint, + "level": level + }}) return updated_appt_footprint diff --git a/src/tlo/simulation.py b/src/tlo/simulation.py index eac1bbdc89..17016f5fc7 100644 --- a/src/tlo/simulation.py +++ b/src/tlo/simulation.py @@ -8,16 +8,10 @@ import time from collections import Counter, OrderedDict from pathlib import Path -from typing import Optional from typing import TYPE_CHECKING, Optional -import pandas as pd -import tlo.population import numpy as np -import tlo.methods.collect_event_chains from tlo.notify import notifier -from tlo.methods.collect_event_chains import CollectEventChains -from tlo.util import df_to_EAV, convert_chain_links_into_EAV try: import dill @@ -42,9 +36,6 @@ logger = logging.getLogger(__name__) logger.setLevel(logging.INFO) -logger_chains = logging.getLogger("tlo.methods.event") -logger_chains.setLevel(logging.INFO) - class SimulationPreviouslyInitialisedError(Exception): """Exception raised when trying to initialise an already initialised simulation.""" @@ -113,16 +104,9 @@ def __init__( self.date = self.start_date = start_date self.modules = OrderedDict() self.event_queue = EventQueue() - - self.generate_event_chains = True - self.generate_event_chains_modules_of_interest = [] - self.generate_event_chains_ignore_events = [] - self.end_date = None self.output_file = None self.population: Optional[Population] = None - - self.show_progress_bar = show_progress_bar self.resourcefilepath = Path(resourcefilepath) @@ -152,7 +136,6 @@ def __init__( # Whether simulation has been initialised self._initialised = False - def _configure_logging( self, @@ -299,21 +282,13 @@ def make_initial_population(self, *, n: int) -> None: key="debug", data=f"{module.name}.initialise_population() {time.time() - start1} s", ) - - # When logging events for each individual to reconstruct chains, only the changes in individual properties will be logged. - # At the start of the simulation + when a new individual is born, we therefore want to store all of their properties at the start. - if self.generate_event_chains: - - # EDNAV structure to capture status of individuals at the start of the simulation - ednav = df_to_EAV(self.population.props, self.date, 'StartOfSimulation') - - logger.info(key='event_chains', - data = ednav.to_dict(), - description='Links forming chains of events for simulated individuals') + + # Dispatch notification that pop has been initialised + notifier.dispatch("simulation.pop_has_been_initialised", data={}) end = time.time() logger.info(key="info", data=f"make_initial_population() {end - start} s") - + def initialise(self, *, end_date: Date) -> None: """Initialise all modules in simulation. :param end_date: Date to end simulation on - accessible to modules to allow @@ -326,15 +301,6 @@ def initialise(self, *, end_date: Date) -> None: self.date = self.start_date self.end_date = end_date # store the end_date so that others can reference it - #self.generate_event_chains = generate_event_chains - if self.generate_event_chains: - # For now keep these fixed, eventually they will be input from user - self.generate_event_chains_modules_of_interest = [self.modules] - self.generate_event_chains_ignore_events = ['AgeUpdateEvent','HealthSystemScheduler', 'SimplifiedBirthsPoll','DirectBirth', 'LifestyleEvent', 'TbActiveCasePollGenerateData','HivPollingEventForDataGeneration', 'RTIPollingEvent'] - - # Reorder columns to place the new columns at the front - pd.set_option('display.max_columns', None) - for module in self.modules.values(): module.initialise_simulation(self) self._initialised = True @@ -403,8 +369,6 @@ def run_simulation_to(self, *, to_date: Date) -> None: :param to_date: Date to simulate up to but not including - must be before or equal to simulation end date specified in call to :py:meth:`initialise`. """ - open('output.txt', mode='a') - if not self._initialised: msg = "Simulation must be initialised before calling run_simulation_to" raise SimulationNotInitialisedError(msg) @@ -463,7 +427,6 @@ def fire_single_event(self, event: Event, date: Date) -> None: """ self.date = date event.run() - def do_birth(self, mother_id: int) -> int: """Create a new child person. @@ -478,22 +441,12 @@ def do_birth(self, mother_id: int) -> int: for module in self.modules.values(): module.on_birth(mother_id, child_id) - if self.generate_event_chains: - # When individual is born, store their initial properties to provide a starting point to the chain of property - # changes that this individual will undergo as a result of events taking place. - link_info = self.population.props.loc[child_id].to_dict() - link_info['EventName'] = 'Birth' - chain_links = {} - chain_links[child_id] = link_info # Convert to string to avoid issue of length - - ednav = convert_chain_links_into_EAV(chain_links) - - logger.info(key='event_chains', - data = ednav.to_dict(), - description='Links forming chains of events for simulated individuals') + # Dispatch notification that birth is about to occur + notifier.dispatch("simulation.on_birth", data={'target': child_id, 'link_info' : {'EventName': 'Birth'}}) return child_id + def find_events_for_person(self, person_id: int) -> list[tuple[Date, Event]]: """Find the events in the queue for a particular person. From e617aa9a1885a260c28dfc47db5c72cac09fdcdd Mon Sep 17 00:00:00 2001 From: Asif Tamuri Date: Tue, 25 Nov 2025 13:39:35 +0000 Subject: [PATCH 45/54] Clear listeners in the global notifier instance at the start of simulation --- src/tlo/notify.py | 7 +++++++ src/tlo/simulation.py | 3 +++ 2 files changed, 10 insertions(+) diff --git a/src/tlo/notify.py b/src/tlo/notify.py index 48c46b82b4..b1b4434ba9 100644 --- a/src/tlo/notify.py +++ b/src/tlo/notify.py @@ -60,6 +60,13 @@ def dispatch(self, notification_key, data=None): for listener in self.listeners[notification_key]: listener(data) + def clear_listeners(self): + """ + Clear all registered listeners. Essential because the notifier is a global singleton. + e.g. if you are running multiple tests or simulations in the same process. + """ + self.listeners.clear() + # Create a global notifier instance notifier = Notifier() diff --git a/src/tlo/simulation.py b/src/tlo/simulation.py index d2560f92d9..b0bd733234 100644 --- a/src/tlo/simulation.py +++ b/src/tlo/simulation.py @@ -26,6 +26,7 @@ topologically_sort_modules, ) from tlo.events import Event, IndividualScopeEventMixin +from tlo.notify import notifier from tlo.progressbar import ProgressBar if TYPE_CHECKING: @@ -116,6 +117,8 @@ def __init__( self._custom_log_levels = None self._log_filepath = self._configure_logging(**log_config) + # clear notifier listeners from any previous simulation in this process + notifier.clear_listeners() # random number generator seed_from = "auto" if seed is None else "user" From 4fe8e1f11d9e7fa142735290e3d2f249d73c90d3 Mon Sep 17 00:00:00 2001 From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com> Date: Tue, 25 Nov 2025 14:03:16 +0000 Subject: [PATCH 46/54] Correct log name in analysis file --- .../ResourceFile_GenerateEventChains/parameter_values.csv | 4 ++-- src/tlo/analysis/utils.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/resources/ResourceFile_GenerateEventChains/parameter_values.csv b/resources/ResourceFile_GenerateEventChains/parameter_values.csv index 82394e590b..2fa792a63a 100644 --- a/resources/ResourceFile_GenerateEventChains/parameter_values.csv +++ b/resources/ResourceFile_GenerateEventChains/parameter_values.csv @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f68e30f87dbe757b98cea2658c8f0c40cab629c4b6825a012ce90e12a27bc612 -size 102 +oid sha256:fbb13b64e27d6ba81ff4bc779e4c50f0dda63b784ec7cc1790ddb64777fb9f35 +size 242 diff --git a/src/tlo/analysis/utils.py b/src/tlo/analysis/utils.py index fc0d374fd1..94bc541d30 100644 --- a/src/tlo/analysis/utils.py +++ b/src/tlo/analysis/utils.py @@ -440,7 +440,7 @@ def extract_event_chains(results_folder: Path, """Utility function to collect chains of events. Individuals across runs of the same draw will be combined into unique df. Returns dictionary where keys are draws, and each draw is associated with a dataframe of format 'E', 'EventDate', 'EventName', 'Info' where 'Info' is a dictionary that combines A&Vs for a particular individual + date + event name combination. """ - module = 'tlo.simulation' + module = 'tlo.collect_event_chains' key = 'event_chains' # get number of draws and numbers of runs From c1e60969688f50bfef1aabde122fdffe2dc6f151 Mon Sep 17 00:00:00 2001 From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com> Date: Tue, 25 Nov 2025 14:34:45 +0000 Subject: [PATCH 47/54] Summarise checks on whether to collect event changes --- src/tlo/events.py | 3 +- src/tlo/methods/collect_event_chains.py | 58 ++++++++++++------------- src/tlo/methods/hsi_event.py | 2 +- 3 files changed, 30 insertions(+), 33 deletions(-) diff --git a/src/tlo/events.py b/src/tlo/events.py index dba2f33cd5..e79074b333 100644 --- a/src/tlo/events.py +++ b/src/tlo/events.py @@ -65,9 +65,8 @@ def apply(self, target): def run(self): """Make the event happen.""" - # Dispatch notification that event is about to run - notifier.dispatch("event.about_to_run", data={"target": self.target, "link_info" : {"EventName": type(self).__name__}}) + notifier.dispatch("event.about_to_run", data={"target": self.target, "module" : self.module, "link_info" : {"EventName": type(self).__name__}}) self.apply(self.target) self.post_apply_hook() diff --git a/src/tlo/methods/collect_event_chains.py b/src/tlo/methods/collect_event_chains.py index 7fb946c524..4ce38b43f8 100644 --- a/src/tlo/methods/collect_event_chains.py +++ b/src/tlo/methods/collect_event_chains.py @@ -50,7 +50,7 @@ def read_parameters(self, resourcefilepath: Optional[Path] = None): #print("resource file path", resourcefilepath) #self.load_parameters_from_dataframe(pd.read_csv(resourcefilepath/"ResourceFile_GenerateEventChains/parameter_values.csv")) self.parameters["generate_event_chains"] = True - self.parameters["modules_of_interest"] = self.sim.modules + self.parameters["modules_of_interest"] = self.sim.modules.values() self.parameters["events_to_ignore"] =["AgeUpdateEvent","HealthSystemScheduler","SimplifiedBirthsPoll","DirectBirth","LifestyleEvent","TbActiveCasePollGenerateData","HivPollingEventForDataGeneration","RTIPollingEvent"] @@ -96,9 +96,13 @@ def on_notification_event_about_to_run(self, data): """Do this when notified that an event is about to run. This function checks whether this event should be logged as part of the event chains, and if so stored required information before the event has occurred. """ p = self.parameters - - if p['generate_event_chains']: + # Only log event if + # 1) generate_event_chains is set to True + # 2) the event belongs to modules of interest and + # 3) the event is not in the list of events to ignore + if p['generate_event_chains'] and (data['module'] in p['modules_of_interest']) and (data['link_info']['EventName'] not in p['events_to_ignore']): + # Initialise these variables self.print_chains = False self.df_before = [] @@ -107,38 +111,32 @@ def on_notification_event_about_to_run(self, data): self.mni_row_before = {} self.entire_mni_before = {} - # Only print event if it belongs to modules of interest and if it is not in the list of events to ignore - if all(sub not in str(data['link_info']['EventName']) for sub in p['events_to_ignore']): + self.print_chains = True - # Will eventually use this once I can actually GET THE NAME OF THE SELF - #if not set(self.sim.generate_event_chains_ignore_events).intersection(str(self)): + # Target is single individual + if not isinstance(data['target'], Population): - self.print_chains = True + # Save row for comparison after event has occurred + self.row_before = self.sim.population.props.loc[abs(data['target'])].copy().fillna(-99999) - # Target is single individual - if not isinstance(data['target'], Population): - - # Save row for comparison after event has occurred - self.row_before = self.sim.population.props.loc[abs(data['target'])].copy().fillna(-99999) - - # Check if individual is already in mni dictionary, if so copy her original status - if 'PregnancySupervisor' in self.sim.modules: - mni = self.sim.modules['PregnancySupervisor'].mother_and_newborn_info - if data['target'] in mni: - self.mni_instances_before = True - self.mni_row_before = mni[data['target']].copy() - else: - self.mni_row_before = None - + # Check if individual is already in mni dictionary, if so copy her original status + if 'PregnancySupervisor' in self.sim.modules: + mni = self.sim.modules['PregnancySupervisor'].mother_and_newborn_info + if data['target'] in mni: + self.mni_instances_before = True + self.mni_row_before = mni[data['target']].copy() else: + self.mni_row_before = None + + else: - # This will be a population-wide event. In order to find individuals for which this led to - # a meaningful change, make a copy of the while pop dataframe/mni before the event has occurred. - self.df_before = self.sim.population.props.copy() - if 'PregnancySupervisor' in self.sim.modules: - self.entire_mni_before = copy.deepcopy(self.sim.modules['PregnancySupervisor'].mother_and_newborn_info) - else: - self.entire_mni_before = None + # This will be a population-wide event. In order to find individuals for which this led to + # a meaningful change, make a copy of the while pop dataframe/mni before the event has occurred. + self.df_before = self.sim.population.props.copy() + if 'PregnancySupervisor' in self.sim.modules: + self.entire_mni_before = copy.deepcopy(self.sim.modules['PregnancySupervisor'].mother_and_newborn_info) + else: + self.entire_mni_before = None return diff --git a/src/tlo/methods/hsi_event.py b/src/tlo/methods/hsi_event.py index 01bd826f2d..edc7ffb721 100644 --- a/src/tlo/methods/hsi_event.py +++ b/src/tlo/methods/hsi_event.py @@ -201,7 +201,7 @@ def run(self, squeeze_factor): """Make the event happen.""" # Dispatch notification that HSI event is about to run - notifier.dispatch("event.about_to_run", data={"target": self.target, "link_info" : {"EventName": type(self).__name__}}) + notifier.dispatch("event.about_to_run", data={"target": self.target, "module" : self.module, "link_info" : {"EventName": type(self).__name__}}) updated_appt_footprint = self.apply(self.target, squeeze_factor) self.post_apply_hook() From 5e0720496b1a2572aee6e0b4bb30740152c26bc5 Mon Sep 17 00:00:00 2001 From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com> Date: Tue, 25 Nov 2025 15:02:14 +0000 Subject: [PATCH 48/54] Use module names rather than obj for ease of use --- src/tlo/events.py | 2 +- src/tlo/methods/collect_event_chains.py | 2 +- src/tlo/methods/hsi_event.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/tlo/events.py b/src/tlo/events.py index e79074b333..299fffa6ed 100644 --- a/src/tlo/events.py +++ b/src/tlo/events.py @@ -66,7 +66,7 @@ def run(self): """Make the event happen.""" # Dispatch notification that event is about to run - notifier.dispatch("event.about_to_run", data={"target": self.target, "module" : self.module, "link_info" : {"EventName": type(self).__name__}}) + notifier.dispatch("event.about_to_run", data={"target": self.target, "module" : self.module.name, "link_info" : {"EventName": type(self).__name__}}) self.apply(self.target) self.post_apply_hook() diff --git a/src/tlo/methods/collect_event_chains.py b/src/tlo/methods/collect_event_chains.py index 4ce38b43f8..ce480aa97e 100644 --- a/src/tlo/methods/collect_event_chains.py +++ b/src/tlo/methods/collect_event_chains.py @@ -50,7 +50,7 @@ def read_parameters(self, resourcefilepath: Optional[Path] = None): #print("resource file path", resourcefilepath) #self.load_parameters_from_dataframe(pd.read_csv(resourcefilepath/"ResourceFile_GenerateEventChains/parameter_values.csv")) self.parameters["generate_event_chains"] = True - self.parameters["modules_of_interest"] = self.sim.modules.values() + self.parameters["modules_of_interest"] = self.sim.modules.keys() self.parameters["events_to_ignore"] =["AgeUpdateEvent","HealthSystemScheduler","SimplifiedBirthsPoll","DirectBirth","LifestyleEvent","TbActiveCasePollGenerateData","HivPollingEventForDataGeneration","RTIPollingEvent"] diff --git a/src/tlo/methods/hsi_event.py b/src/tlo/methods/hsi_event.py index edc7ffb721..32620f6c28 100644 --- a/src/tlo/methods/hsi_event.py +++ b/src/tlo/methods/hsi_event.py @@ -201,7 +201,7 @@ def run(self, squeeze_factor): """Make the event happen.""" # Dispatch notification that HSI event is about to run - notifier.dispatch("event.about_to_run", data={"target": self.target, "module" : self.module, "link_info" : {"EventName": type(self).__name__}}) + notifier.dispatch("event.about_to_run", data={"target": self.target, "module" : self.module.name, "link_info" : {"EventName": type(self).__name__}}) updated_appt_footprint = self.apply(self.target, squeeze_factor) self.post_apply_hook() From 2ce9bbd9a79c83ffee33294a6c646da717d5fd30 Mon Sep 17 00:00:00 2001 From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com> Date: Tue, 25 Nov 2025 15:24:10 +0000 Subject: [PATCH 49/54] Fix parameters initialisation --- .../parameter_values.csv | 4 ++-- src/tlo/methods/collect_event_chains.py | 11 +++++------ 2 files changed, 7 insertions(+), 8 deletions(-) diff --git a/resources/ResourceFile_GenerateEventChains/parameter_values.csv b/resources/ResourceFile_GenerateEventChains/parameter_values.csv index 2fa792a63a..a84c77ab60 100644 --- a/resources/ResourceFile_GenerateEventChains/parameter_values.csv +++ b/resources/ResourceFile_GenerateEventChains/parameter_values.csv @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:fbb13b64e27d6ba81ff4bc779e4c50f0dda63b784ec7cc1790ddb64777fb9f35 -size 242 +oid sha256:788332259649a19889355820b194fe0d16af44436f208e3a948e833f0ad5139a +size 419 diff --git a/src/tlo/methods/collect_event_chains.py b/src/tlo/methods/collect_event_chains.py index ce480aa97e..7f903e2035 100644 --- a/src/tlo/methods/collect_event_chains.py +++ b/src/tlo/methods/collect_event_chains.py @@ -47,13 +47,12 @@ def initialise_simulation(self, sim): notifier.add_listener("event.has_just_ran", self.on_notification_event_has_just_ran) def read_parameters(self, resourcefilepath: Optional[Path] = None): - #print("resource file path", resourcefilepath) - #self.load_parameters_from_dataframe(pd.read_csv(resourcefilepath/"ResourceFile_GenerateEventChains/parameter_values.csv")) - self.parameters["generate_event_chains"] = True - self.parameters["modules_of_interest"] = self.sim.modules.keys() - - self.parameters["events_to_ignore"] =["AgeUpdateEvent","HealthSystemScheduler","SimplifiedBirthsPoll","DirectBirth","LifestyleEvent","TbActiveCasePollGenerateData","HivPollingEventForDataGeneration","RTIPollingEvent"] + print("resource file path", resourcefilepath) + self.load_parameters_from_dataframe(pd.read_csv(resourcefilepath/"ResourceFile_GenerateEventChains/parameter_values.csv")) + # If modules of interest is '*', set by default to all modules included in the simulation + if self.parameters["modules_of_interest"] == ['*']: + self.parameters["modules_of_interest"] = self.sim.modules.keys() def initialise_population(self, population): pass From a786b2e1cc9c3932a90fa2f3b9c01b556e5d31d7 Mon Sep 17 00:00:00 2001 From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com> Date: Tue, 25 Nov 2025 15:29:43 +0000 Subject: [PATCH 50/54] Fix to type of parameter --- src/tlo/methods/collect_event_chains.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/tlo/methods/collect_event_chains.py b/src/tlo/methods/collect_event_chains.py index 7f903e2035..f1f36224a6 100644 --- a/src/tlo/methods/collect_event_chains.py +++ b/src/tlo/methods/collect_event_chains.py @@ -52,7 +52,7 @@ def read_parameters(self, resourcefilepath: Optional[Path] = None): # If modules of interest is '*', set by default to all modules included in the simulation if self.parameters["modules_of_interest"] == ['*']: - self.parameters["modules_of_interest"] = self.sim.modules.keys() + self.parameters["modules_of_interest"] = list(self.sim.modules.keys()) def initialise_population(self, population): pass From 7af8c70fbb5f367619d98741d38f7f6e7954a926 Mon Sep 17 00:00:00 2001 From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com> Date: Tue, 25 Nov 2025 16:35:54 +0000 Subject: [PATCH 51/54] Give option to overwrite module parameters --- src/tlo/methods/collect_event_chains.py | 37 +++++++++++++++++++++++-- 1 file changed, 35 insertions(+), 2 deletions(-) diff --git a/src/tlo/methods/collect_event_chains.py b/src/tlo/methods/collect_event_chains.py index f1f36224a6..ef5f04d639 100644 --- a/src/tlo/methods/collect_event_chains.py +++ b/src/tlo/methods/collect_event_chains.py @@ -1,7 +1,7 @@ from tlo.notify import notifier from pathlib import Path -from typing import Optional +from typing import Optional, List from tlo import Module, Parameter, Types, logging, population from tlo.population import Population import pandas as pd @@ -15,8 +15,19 @@ class CollectEventChains(Module): - def __init__(self, name=None): + def __init__( + self, + name: Optional[str] = None, + generate_event_chains: Optional[bool] = None, + modules_of_interest: Optional[List[str]] = None, + events_to_ignore: Optional[List[str]] = None + + ): super().__init__(name) + + self.generate_event_chains = generate_event_chains + self.modules_of_interest = modules_of_interest + self.events_to_ignore = events_to_ignore # This is how I am passing data from fnc taking place before event to the one after # It doesn't seem very elegant but not sure how else to go about it @@ -57,6 +68,28 @@ def read_parameters(self, resourcefilepath: Optional[Path] = None): def initialise_population(self, population): pass + def initialise_simulation(self, sim): + # Use parameter file values by default, if not overwritten + self.generate_event_chains = self.parameters['generate_event_chains'] \ + if self.generate_event_chains is None \ + else self.generate_event_chains + + self.modules_of_interest = self.parameters['modules_of_interest'] \ + if self.modules_of_interest is None \ + else self.modules_of_interest + + self.events_to_ignore = self.parameters['events_to_ignore'] \ + if self.events_to_ignore is None \ + else self.events_to_ignore + + def get_generate_event_chains(self) -> bool: + """Returns `generate_event_chains`. (Should be equal to what is specified by the parameter, but + overwrite with what was provided in argument if an argument was specified -- provided for backward + compatibility/debugging.)""" + return self.parameters['generate_event_chains'] \ + if self.arg_generate_event_chains is None \ + else self.arg_generate_event_chains + def on_birth(self, mother, child): # Could the notification of birth simply take place here? pass From d8e6922a94eb5ecd398d20c886d9678dad3fb72f Mon Sep 17 00:00:00 2001 From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com> Date: Tue, 25 Nov 2025 16:40:42 +0000 Subject: [PATCH 52/54] Correct use of parameters --- src/tlo/methods/collect_event_chains.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/src/tlo/methods/collect_event_chains.py b/src/tlo/methods/collect_event_chains.py index ef5f04d639..0dca32dec1 100644 --- a/src/tlo/methods/collect_event_chains.py +++ b/src/tlo/methods/collect_event_chains.py @@ -127,13 +127,11 @@ def on_notification_of_birth(self, data): def on_notification_event_about_to_run(self, data): """Do this when notified that an event is about to run. This function checks whether this event should be logged as part of the event chains, and if so stored required information before the event has occurred. """ - p = self.parameters - # Only log event if # 1) generate_event_chains is set to True # 2) the event belongs to modules of interest and # 3) the event is not in the list of events to ignore - if p['generate_event_chains'] and (data['module'] in p['modules_of_interest']) and (data['link_info']['EventName'] not in p['events_to_ignore']): + if self.generate_event_chains and (data['module'] in self.modules_of_interest) and (data['link_info']['EventName'] not in self.events_to_ignore): # Initialise these variables self.print_chains = False @@ -175,10 +173,8 @@ def on_notification_event_about_to_run(self, data): def on_notification_event_has_just_ran(self, data): """ If print_chains=True, this function logs the event and identifies and logs the any property changes that have occured to one or multiple individuals as a result of the event taking place. """ - - p = self.parameters - if p['generate_event_chains'] and self.print_chains: + if self.print_chains: chain_links = {} From fd761f77511513e18875bedaf3b6ab3100eb8170 Mon Sep 17 00:00:00 2001 From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com> Date: Tue, 25 Nov 2025 16:45:45 +0000 Subject: [PATCH 53/54] Exit as soon as condition is not met --- src/tlo/methods/collect_event_chains.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/tlo/methods/collect_event_chains.py b/src/tlo/methods/collect_event_chains.py index 0dca32dec1..b3eb42a8b3 100644 --- a/src/tlo/methods/collect_event_chains.py +++ b/src/tlo/methods/collect_event_chains.py @@ -131,7 +131,9 @@ def on_notification_event_about_to_run(self, data): # 1) generate_event_chains is set to True # 2) the event belongs to modules of interest and # 3) the event is not in the list of events to ignore - if self.generate_event_chains and (data['module'] in self.modules_of_interest) and (data['link_info']['EventName'] not in self.events_to_ignore): + if not self.generate_event_chains or (data['module'] not in self.modules_of_interest) or (data['link_info']['EventName'] in self.events_to_ignore): + return + else: # Initialise these variables self.print_chains = False @@ -174,7 +176,9 @@ def on_notification_event_about_to_run(self, data): def on_notification_event_has_just_ran(self, data): """ If print_chains=True, this function logs the event and identifies and logs the any property changes that have occured to one or multiple individuals as a result of the event taking place. """ - if self.print_chains: + if not self.print_chains: + return + else: chain_links = {} From edd9e0b8a3599b28ed91b87aa568180971aa9643 Mon Sep 17 00:00:00 2001 From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com> Date: Tue, 25 Nov 2025 18:07:30 +0000 Subject: [PATCH 54/54] Allow option to overwrite parameter file --- .../parameter_values.csv | 4 +- .../scenario_generate_chains.py | 81 ++----------------- src/tlo/events.py | 3 +- src/tlo/methods/collect_event_chains.py | 13 +-- 4 files changed, 12 insertions(+), 89 deletions(-) diff --git a/resources/ResourceFile_GenerateEventChains/parameter_values.csv b/resources/ResourceFile_GenerateEventChains/parameter_values.csv index a84c77ab60..ebf20c5f79 100644 --- a/resources/ResourceFile_GenerateEventChains/parameter_values.csv +++ b/resources/ResourceFile_GenerateEventChains/parameter_values.csv @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:788332259649a19889355820b194fe0d16af44436f208e3a948e833f0ad5139a -size 419 +oid sha256:172a0c24c859aaafbad29f6016433cac7a7324efc582e6c4b19c74b6b97436e7 +size 420 diff --git a/src/scripts/analysis_data_generation/scenario_generate_chains.py b/src/scripts/analysis_data_generation/scenario_generate_chains.py index 6cfbd040fa..0f53a1461b 100644 --- a/src/scripts/analysis_data_generation/scenario_generate_chains.py +++ b/src/scripts/analysis_data_generation/scenario_generate_chains.py @@ -48,7 +48,7 @@ wasting, ) -class GenerateDataChains(BaseScenario): +class GenerateEventChains(BaseScenario): def __init__(self): super().__init__() self.seed = 42 @@ -71,101 +71,30 @@ def log_configuration(self): 'tlo.methods.demography.detail': logging.WARNING, 'tlo.methods.healthburden': logging.INFO, 'tlo.methods.healthsystem.summary': logging.INFO, + 'tlo.methods.collect_event_chains': logging.INFO } } def modules(self): - # MODIFY - # Here instead of running full module - """ - return [demography.Demography(resourcefilepath=self.resources), - enhanced_lifestyle.Lifestyle(resourcefilepath=self.resources), - healthburden.HealthBurden(resourcefilepath=self.resources), - symptommanager.SymptomManager(resourcefilepath=self.resources, spurious_symptoms=False),#, - #rti.RTI(resourcefilepath=self.resources), - pregnancy_supervisor.PregnancySupervisor(resourcefilepath=self.resources), - labour.Labour(resourcefilepath=self.resources), - care_of_women_during_pregnancy.CareOfWomenDuringPregnancy(resourcefilepath=self.resources), - contraception.Contraception(resourcefilepath=self.resources), - newborn_outcomes.NewbornOutcomes(resourcefilepath=self.resources), - postnatal_supervisor.PostnatalSupervisor(resourcefilepath=self.resources), - hiv.Hiv(resourcefilepath=self.resources), - tb.Tb(resourcefilepath=self.resources), - epi.Epi(resourcefilepath=self.resources), - healthseekingbehaviour.HealthSeekingBehaviour(resourcefilepath=self.resources), - #simplified_births.SimplifiedBirths(resourcefilepath=resourcefilepath), - healthsystem.HealthSystem(resourcefilepath=self.resources, - mode_appt_constraints=1, - cons_availability='all')] - """ return ( - fullmodel(resourcefilepath=self.resources) - + [ImprovedHealthSystemAndCareSeekingScenarioSwitcher(resourcefilepath=self.resources)] + fullmodel() ) - """ - def draw_parameters(self, draw_number, rng): - return mix_scenarios( - get_parameters_for_status_quo(), - { - 'HealthSystem': { - 'Service_Availability': list(self._scenarios.values())[draw_number], - }, - } - ) - - def _get_scenarios(self) -> Dict[str, list[str]]: - Return the Dict with values for the parameter `Service_Availability` keyed by a name for the scenario. - The sequences of scenarios systematically omits one of the TREATMENT_ID's that is defined in the model. - # Generate list of TREATMENT_IDs and filter to the resolution needed - treatments = get_filtered_treatment_ids(depth=2) - treatments_RTI = [item for item in treatments if 'Rti' in item] - - # Return 'Service_Availability' values, with scenarios for everything, nothing, and ones for which each - # treatment is omitted - service_availability = dict({"Everything": ["*", "Nothing": []}) - #service_availability.update( - # {f"No {t.replace('_*', '*')}": [x for x in treatments if x != t] for t in treatments_RTI} - #) - - return service_availability - - """ def draw_parameters(self, draw_number, rng): if draw_number < self.number_of_draws: return list(self._scenarios.values())[draw_number] else: return - # case 1: gfHE = -0.030, factor = 1.01074 - # case 2: gfHE = -0.020, factor = 1.02116 - # case 3: gfHE = -0.015, factor = 1.02637 - # case 4: gfHE = 0.015, factor = 1.05763 - # case 5: gfHE = 0.020, factor = 1.06284 - # case 6: gfHE = 0.030, factor = 1.07326 - def _get_scenarios(self) -> Dict[str, Dict]: - #Return the Dict with values for the parameters that are changed, keyed by a name for the scenario. - - treatments = get_filtered_treatment_ids(depth=2) - treatments_RTI = [item for item in treatments if 'Rti' in item] - - # Return 'Service_Availability' values, with scenarios for everything, nothing, and ones for which each - # treatment is omitted - service_availability = dict({"Everything": ["*"], "Nothing": []}) - service_availability.update( - {f"No {t.replace('_*', '*')}": [x for x in treatments if x != t] for t in treatments_RTI} - ) - print(service_availability.keys()) return { - # =========== STATUS QUO ============ "Baseline": mix_scenarios( self._baseline(), { - "HealthSystem": { - "Service_Availability": service_availability["No Rti_ShockTreatment*"], + "CollectEventChains": { + "generate_event_chains": True, }, } ), diff --git a/src/tlo/events.py b/src/tlo/events.py index 299fffa6ed..56acb82f43 100644 --- a/src/tlo/events.py +++ b/src/tlo/events.py @@ -60,11 +60,10 @@ def apply(self, target): :param target: the target of the event """ raise NotImplementedError - def run(self): """Make the event happen.""" - + # Dispatch notification that event is about to run notifier.dispatch("event.about_to_run", data={"target": self.target, "module" : self.module.name, "link_info" : {"EventName": type(self).__name__}}) diff --git a/src/tlo/methods/collect_event_chains.py b/src/tlo/methods/collect_event_chains.py index b3eb42a8b3..712d8c045e 100644 --- a/src/tlo/methods/collect_event_chains.py +++ b/src/tlo/methods/collect_event_chains.py @@ -58,17 +58,9 @@ def initialise_simulation(self, sim): notifier.add_listener("event.has_just_ran", self.on_notification_event_has_just_ran) def read_parameters(self, resourcefilepath: Optional[Path] = None): - print("resource file path", resourcefilepath) self.load_parameters_from_dataframe(pd.read_csv(resourcefilepath/"ResourceFile_GenerateEventChains/parameter_values.csv")) - - # If modules of interest is '*', set by default to all modules included in the simulation - if self.parameters["modules_of_interest"] == ['*']: - self.parameters["modules_of_interest"] = list(self.sim.modules.keys()) def initialise_population(self, population): - pass - - def initialise_simulation(self, sim): # Use parameter file values by default, if not overwritten self.generate_event_chains = self.parameters['generate_event_chains'] \ if self.generate_event_chains is None \ @@ -81,6 +73,10 @@ def initialise_simulation(self, sim): self.events_to_ignore = self.parameters['events_to_ignore'] \ if self.events_to_ignore is None \ else self.events_to_ignore + + # If modules of interest is '*', set by default to all modules included in the simulation + if self.modules_of_interest == ['*']: + self.modules_of_interest = list(self.sim.modules.keys()) def get_generate_event_chains(self) -> bool: """Returns `generate_event_chains`. (Should be equal to what is specified by the parameter, but @@ -134,7 +130,6 @@ def on_notification_event_about_to_run(self, data): if not self.generate_event_chains or (data['module'] not in self.modules_of_interest) or (data['link_info']['EventName'] in self.events_to_ignore): return else: - # Initialise these variables self.print_chains = False self.df_before = []