Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
61 commits
Select commit Hold shift + click to select a range
dbff470
Investigate analysis of events at sim level
marghe-molaro Apr 3, 2024
bf64628
Merge branch 'master' into molaro/harvest-training-data
marghe-molaro Sep 17, 2024
05098f7
Final data-printing set-up
marghe-molaro Sep 30, 2024
16c071c
Print event chains
marghe-molaro Oct 2, 2024
ba81487
Add chains in mode 2 too and clean up in simuation
marghe-molaro Oct 2, 2024
0474624
Merged with master, and moved all logging into event module to keep t…
marghe-molaro Oct 2, 2024
b1c907c
Fix issue with tests by ensuring standard Polling and infection is ma…
marghe-molaro Oct 7, 2024
cfb4264
Switch iloc for loc
marghe-molaro Oct 7, 2024
e0327de
Change syntax of if statement
marghe-molaro Oct 7, 2024
fceee02
Change syntax of if statement and print string of event
marghe-molaro Oct 9, 2024
eaeae62
Focus on rti and print footprint
marghe-molaro Oct 10, 2024
c7bd9d0
Only store change in individual properties, not entire property row. …
marghe-molaro Oct 11, 2024
769aaec
Style fixes
marghe-molaro Oct 11, 2024
757cee3
Include printing of individual properties at the beginning and at bir…
marghe-molaro Oct 13, 2024
22a5e44
Log everything to simulation, as events logger doesn't seem to be vis…
marghe-molaro Oct 16, 2024
7faa817
Consider all modules included as of interest
marghe-molaro Oct 18, 2024
7232f97
Remove pop-wide HSI warning and make epi default even when printing c…
marghe-molaro Oct 18, 2024
98a8832
Merge branch 'master' into molaro/harvest-training-data
marghe-molaro Oct 18, 2024
a6def2d
Style fix
marghe-molaro Oct 18, 2024
ecea532
Remove data generation test, which wasn't really a test
marghe-molaro Oct 18, 2024
ae7a44c
Change dict of properties to string in logging, and add analysis files
marghe-molaro Oct 23, 2024
16299a2
Include debugging option, final set-up of scenario to print data, ana…
marghe-molaro Nov 25, 2024
0dd862f
Change label of person when iterating
marghe-molaro Nov 26, 2024
0e7dc99
Merge branch 'master' into molaro/harvest-training-data
marghe-molaro Dec 9, 2024
84f8263
Correctly retrieve event name
marghe-molaro Dec 13, 2024
a490d19
Modify scenario file such that can exclude specific services, and cor…
marghe-molaro Jan 20, 2025
08a5d9a
Change seed in scenario file
marghe-molaro Apr 12, 2025
3dda343
latest scenario
marghe-molaro Apr 14, 2025
d9e3f66
Latest scenario version
marghe-molaro Apr 29, 2025
ddf6f68
Latest version of scenario file
marghe-molaro Apr 29, 2025
0e38408
Ensure changes to mni dataframe are captured as well
marghe-molaro Oct 9, 2025
9b8f01f
Tidy up
marghe-molaro Oct 9, 2025
3b81de6
All fixes made
marghe-molaro Oct 9, 2025
bc61e1e
Cleaned and [skip ci]
marghe-molaro Nov 17, 2025
e084e39
Start logging data in EAV format
marghe-molaro Nov 20, 2025
ac617e8
Log event chains via EAV approach
marghe-molaro Nov 21, 2025
5234550
No need to store EventDate since this is already stored in logger by …
marghe-molaro Nov 21, 2025
b3e6ccc
Merge master
marghe-molaro Nov 21, 2025
2f20cb3
Check if PregnancySupervisor is included before considering in chain …
marghe-molaro Nov 21, 2025
1b83823
Remove old util fnc
marghe-molaro Nov 22, 2025
f4cf120
Overwrite any changes to hiv and tb file
marghe-molaro Nov 22, 2025
29dd543
Overwrite any changes to demography file
marghe-molaro Nov 22, 2025
33f1143
Remove outdated test related to RTI data harvesting
marghe-molaro Nov 22, 2025
af477c2
Add a very simple synchronous notification dispatcher
tamuri Nov 24, 2025
01e35d0
Fix comment
tamuri Nov 24, 2025
9f23fcb
Fix formatting
tamuri Nov 24, 2025
5ff53bb
Remove unnecessary list wrap
tamuri Nov 24, 2025
02278b3
Merge branch 'tamuri/signal' into molaro/harvest-training-data-includ…
marghe-molaro Nov 24, 2025
16f5e67
Use broadcaster to collect events. Need to expand to include HSI events
marghe-molaro Nov 24, 2025
ebe0ebc
Use broadcasting in HSI events too
marghe-molaro Nov 25, 2025
e617aa9
Clear listeners in the global notifier instance at the start of simul…
tamuri Nov 25, 2025
4fe8e1f
Correct log name in analysis file
marghe-molaro Nov 25, 2025
474a1e5
Merge branch 'tamuri/signal' into molaro/harvest-training-data-includ…
marghe-molaro Nov 25, 2025
c1e6096
Summarise checks on whether to collect event changes
marghe-molaro Nov 25, 2025
5e07204
Use module names rather than obj for ease of use
marghe-molaro Nov 25, 2025
2ce9bbd
Fix parameters initialisation
marghe-molaro Nov 25, 2025
a786b2e
Fix to type of parameter
marghe-molaro Nov 25, 2025
7af8c70
Give option to overwrite module parameters
marghe-molaro Nov 25, 2025
d8e6922
Correct use of parameters
marghe-molaro Nov 25, 2025
fd761f7
Exit as soon as condition is not met
marghe-molaro Nov 25, 2025
edd9e0b
Allow option to overwrite parameter file
marghe-molaro Nov 25, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Git LFS file not shown
557 changes: 557 additions & 0 deletions src/scripts/analysis_data_generation/analysis_extract_data.py

Large diffs are not rendered by default.

156 changes: 156 additions & 0 deletions src/scripts/analysis_data_generation/postprocess_events_chain.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,156 @@
import pandas as pd
from dateutil.relativedelta import relativedelta

# Remove from every individual's event chain all events that were fired after death
def cut_off_events_after_death(df):

events_chain = df.groupby('person_ID')

filtered_data = pd.DataFrame()

for name, group in events_chain:

# Find the first non-NaN 'date_of_death' and its index
first_non_nan_index = group['date_of_death'].first_valid_index()

if first_non_nan_index is not None:
# Filter out all rows after the first non-NaN index
filtered_group = group.loc[:first_non_nan_index] # Keep rows up to and including the first valid index
filtered_data = pd.concat([filtered_data, filtered_group])
else:
# If there are no non-NaN values, keep the original group
filtered_data = pd.concat([filtered_data, group])

return filtered_data

# Load into DataFrame
def load_csv_to_dataframe(file_path):
try:
# Load raw chains into df
df = pd.read_csv(file_path)
print("Raw event chains loaded successfully!")
return df
except FileNotFoundError:
print(f"Error: The file '{file_path}' was not found.")
except Exception as e:
print(f"An error occurred: {e}")

file_path = 'output.csv' # Replace with the path to your CSV file

output = load_csv_to_dataframe(file_path)

# Some of the dates appeared not to be in datetime format. Correct here.
output['date_of_death'] = pd.to_datetime(output['date_of_death'], errors='coerce')
output['date_of_birth'] = pd.to_datetime(output['date_of_birth'], errors='coerce')
if 'hv_date_inf' in output.columns:
output['hv_date_inf'] = pd.to_datetime(output['hv_date_inf'], errors='coerce')


date_start = pd.to_datetime('2010-01-01')
if 'Other' in output['cause_of_death'].values:
print("ERROR: 'Other' was included in sim as possible cause of death")
exit(-1)

# Choose which columns in individual properties to visualise
columns_to_print =['event','is_alive','hv_inf', 'hv_art','tb_inf', 'tb_date_active', 'event_date', 'when']
#columns_to_print =['person_ID', 'date_of_birth', 'date_of_death', 'cause_of_death','hv_date_inf', 'hv_art','tb_inf', 'tb_date_active', 'event date', 'event']

# When checking which individuals led to *any* changes in individual properties, exclude these columns from comparison
columns_to_exclude_in_comparison = ['when', 'event', 'event_date', 'age_exact_years', 'age_years', 'age_days', 'age_range', 'level', 'appt_footprint']

# If considering epidemiology consistent with sim, add check here.
check_ages_of_those_HIV_inf = False
if check_ages_of_those_HIV_inf:
for index, row in output.iterrows():
if pd.isna(row['hv_date_inf']):
continue # Skip this iteration
diff = relativedelta(output.loc[index, 'hv_date_inf'],output.loc[index, 'date_of_birth'])
if diff.years > 1 and diff.years<15:
print("Person contracted HIV infection at age younger than 15", diff)

# Remove events after death
filtered_data = cut_off_events_after_death(output)

print_raw_events = True # Print raw chain of events for each individual
print_selected_changes = False
print_all_changes = True
person_ID_of_interest = 494

pd.set_option('display.max_rows', None)

for name, group in filtered_data.groupby('person_ID'):
list_of_dob = group['date_of_birth']

# Select individuals based on when they were born
if list_of_dob.iloc[0].year<2010:

# Check that immutable properties are fixed for this individual, i.e. that events were collated properly:
all_identical_dob = group['date_of_birth'].nunique() == 1
all_identical_sex = group['sex'].nunique() == 1
if all_identical_dob is False or all_identical_sex is False:
print("Immutable properties are changing! This is not chain for single individual")
print(group)
exit(-1)

print("----------------------------------------------------------------------")
print("person_ID ", group['person_ID'].iloc[0], "d.o.b ", group['date_of_birth'].iloc[0])
print("Number of events for this individual ", group['person_ID'].iloc[0], "is :", len(group)/2) # Divide by 2 before printing Before/After for each event
number_of_events =len(group)/2
number_of_changes=0
if print_raw_events:
print(group)

if print_all_changes:
# Check each row
comparison = group.drop(columns=columns_to_exclude_in_comparison).fillna(-99999).ne(group.drop(columns=columns_to_exclude_in_comparison).shift().fillna(-99999))

# Iterate over rows where any column has changed
for idx, row_changed in comparison.iloc[1:].iterrows():
if row_changed.any(): # Check if any column changed in this row
number_of_changes+=1
changed_columns = row_changed[row_changed].index.tolist() # Get the columns where changes occurred
print(f"Row {idx} - Changes detected in columns: {changed_columns}")
columns_output = ['event', 'event_date', 'appt_footprint', 'level'] + changed_columns
print(group.loc[idx, columns_output]) # Print only the changed columns
if group.loc[idx, 'when'] == 'Before':
print('-----> THIS CHANGE OCCURRED BEFORE EVENT!')
#print(group.loc[idx,columns_to_print])
print() # For better readability
print("Number of changes is ", number_of_changes, "out of ", number_of_events, " events")

if print_selected_changes:
tb_inf_condition = (
((group['tb_inf'].shift(1) == 'uninfected') & (group['tb_inf'] == 'active')) |
((group['tb_inf'].shift(1) == 'latent') & (group['tb_inf'] == 'active')) |
((group['tb_inf'].shift(1) == 'active') & (group['tb_inf'] == 'latent')) |
((group['hv_inf'].shift(1) is False) & (group['hv_inf'] is True)) |
((group['hv_art'].shift(1) == 'not') & (group['hv_art'] == 'on_not_VL_suppressed')) |
((group['hv_art'].shift(1) == 'not') & (group['hv_art'] == 'on_VL_suppressed')) |
((group['hv_art'].shift(1) == 'on_VL_suppressed') & (group['hv_art'] == 'on_not_VL_suppressed')) |
((group['hv_art'].shift(1) == 'on_VL_suppressed') & (group['hv_art'] == 'not')) |
((group['hv_art'].shift(1) == 'on_not_VL_suppressed') & (group['hv_art'] == 'on_VL_suppressed')) |
((group['hv_art'].shift(1) == 'on_not_VL_suppressed') & (group['hv_art'] == 'not'))
)

alive_condition = (
(group['is_alive'].shift(1) is True) & (group['is_alive'] is False)
)
# Combine conditions for rows of interest
transition_condition = tb_inf_condition | alive_condition

if list_of_dob.iloc[0].year >= 2010:
print("DETECTED OF INTEREST")
print(group[group['event'] == 'Birth'][columns_to_print])

# Filter the DataFrame based on the condition
filtered_transitions = group[transition_condition]
if not filtered_transitions.empty:
if list_of_dob.iloc[0].year < 2010:
print("DETECTED OF INTEREST")
print(filtered_transitions[columns_to_print])


print("Number of individuals simulated ", filtered_data.groupby('person_ID').ngroups)



119 changes: 119 additions & 0 deletions src/scripts/analysis_data_generation/scenario_generate_chains.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,119 @@
"""This Scenario file run the model to generate event chans

Run on the batch system using:
```
tlo batch-submit
src/scripts/analysis_data_generation/scenario_generate_chains.py
```

or locally using:
```
tlo scenario-run src/scripts/analysis_data_generation/scenario_generate_chains.py
```

"""
from pathlib import Path
from typing import Dict

import pandas as pd

from tlo import Date, logging
from tlo.analysis.utils import get_parameters_for_status_quo, mix_scenarios, get_filtered_treatment_ids
from tlo.methods.fullmodel import fullmodel
from tlo.methods.scenario_switcher import ImprovedHealthSystemAndCareSeekingScenarioSwitcher
from tlo.scenario import BaseScenario
from tlo.methods import (
alri,
cardio_metabolic_disorders,
care_of_women_during_pregnancy,
contraception,
demography,
depression,
diarrhoea,
enhanced_lifestyle,
epi,
healthburden,
healthseekingbehaviour,
healthsystem,
hiv,
rti,
labour,
malaria,
newborn_outcomes,
postnatal_supervisor,
pregnancy_supervisor,
stunting,
symptommanager,
tb,
wasting,
)

class GenerateEventChains(BaseScenario):
def __init__(self):
super().__init__()
self.seed = 42
self.start_date = Date(2010, 1, 1)
self.end_date = self.start_date + pd.DateOffset(months=1)
self.pop_size = 1000
self._scenarios = self._get_scenarios()
self.number_of_draws = len(self._scenarios)
self.runs_per_draw = 3
self.generate_event_chains = True

def log_configuration(self):
return {
'filename': 'generate_event_chains',
'directory': Path('./outputs'), # <- (specified only for local running)
'custom_levels': {
'*': logging.WARNING,
'tlo.methods.demography': logging.INFO,
'tlo.methods.events': logging.INFO,
'tlo.methods.demography.detail': logging.WARNING,
'tlo.methods.healthburden': logging.INFO,
'tlo.methods.healthsystem.summary': logging.INFO,
'tlo.methods.collect_event_chains': logging.INFO
}
}

def modules(self):
return (
fullmodel()
)

def draw_parameters(self, draw_number, rng):
if draw_number < self.number_of_draws:
return list(self._scenarios.values())[draw_number]
else:
return

def _get_scenarios(self) -> Dict[str, Dict]:

return {
"Baseline":
mix_scenarios(
self._baseline(),
{
"CollectEventChains": {
"generate_event_chains": True,
},
}
),

}

def _baseline(self) -> Dict:
#Return the Dict with values for the parameter changes that define the baseline scenario.
return mix_scenarios(
get_parameters_for_status_quo(),
{
"HealthSystem": {
"mode_appt_constraints": 1, # <-- Mode 1 prior to change to preserve calibration
"cons_availability": "all",
}
},
)

if __name__ == '__main__':
from tlo.cli import scenario_run

scenario_run([__file__])
Loading
Loading