Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
39 commits
Select commit Hold shift + click to select a range
fe02734
Changes to accommodate revised scenarios
sakshimohan Sep 26, 2025
ce1a216
first take - Frontier plot
sakshimohan Sep 26, 2025
1a502d0
corrections to Scenario dictionary
sakshimohan Sep 28, 2025
a05d982
improve legend positioning in Cost effectiveness plane plots
sakshimohan Sep 28, 2025
2a467dc
remove horizontal lines from ROI plots and formatting changes to CEA …
sakshimohan Sep 29, 2025
39bf753
remove horizontal lines from ROI plots and formatting changes to CEA …
sakshimohan Sep 29, 2025
72ae25b
move all the functions up
sakshimohan Oct 1, 2025
603d0be
update the order in which scenarios appear in extracted csvs
sakshimohan Oct 1, 2025
b175cb6
remove superfluous imports
sakshimohan Oct 1, 2025
766a452
new results folder
sakshimohan Nov 14, 2025
94c8ae9
Add consumables plot + change title of ROI plots
sakshimohan Nov 19, 2025
54b818a
formatting edits to consumables plot
sakshimohan Nov 20, 2025
60c6523
fix issues with negative costs/health impact when estimating ICERs an…
sakshimohan Nov 20, 2025
d9b2560
edit roi bar plots for inset figures
sakshimohan Nov 25, 2025
d232355
update scenarios and results folder for latest outputs
sakshimohan Nov 25, 2025
e5c68ab
fix treemap plot - previously summed across the three stats
sakshimohan Nov 28, 2025
4bfe316
update results extracts
sakshimohan Nov 28, 2025
09bbac6
update results folder for costing_validation
sakshimohan Nov 28, 2025
72a4f3e
add per capita estimates
sakshimohan Dec 2, 2025
7665f3a
add per capita estimates
sakshimohan Dec 2, 2025
4e36b92
change input_cost central measure to median
sakshimohan Dec 2, 2025
d205840
generate heatmap plots of consumable availability
sakshimohan Dec 2, 2025
c2d855e
- update the calculation of ICERs to be based on summary stats rather…
sakshimohan Dec 11, 2025
920d7c0
add frontier in the main CEA plot
sakshimohan Dec 11, 2025
cdf9466
update costing scripts to accept latest scenario runs.
sakshimohan Dec 17, 2025
1095943
add the estimate of HSI counts and update the statements on HIV consu…
sakshimohan Dec 18, 2025
68f106d
Add CSB++ to the list of nutritional consumables.
sakshimohan Dec 18, 2025
adb99c9
Update the cost of SAM medicines and scale down the cost of F-75 ther…
sakshimohan Dec 18, 2025
cc360f1
Update cost of F-75 therapeutic milk for validation purposes
sakshimohan Dec 18, 2025
b1beb4d
clean equipment and consumable names and drop irrelevant ones
sakshimohan Dec 18, 2025
00eb25f
add extracts for manuscript on nutrition commodities
sakshimohan Dec 19, 2025
e9680da
fix equipment name cleaning
sakshimohan Dec 19, 2025
f34680e
update the cost of SAM medicines to reflect 15% complicated cases as …
sakshimohan Dec 19, 2025
3cbe24a
update figure format
sakshimohan Dec 19, 2025
9c528e8
add scenario file
sakshimohan Dec 19, 2025
693fb2f
Update format of figures to meet journal requirements
sakshimohan Jan 16, 2026
87c9c16
fix imports
tbhallett Jan 19, 2026
561ddef
linting
tbhallett Jan 19, 2026
38eb261
Merge branch 'master' into sakshi/comparison_of_horizontal_v_vertical_v2
tbhallett Jan 19, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions resources/costing/ResourceFile_Costing_Consumables.csv
Git LFS file not shown

Large diffs are not rendered by default.

162 changes: 154 additions & 8 deletions src/scripts/costing/cost_estimation.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import ast
import itertools
import math
import re
import textwrap
from collections import defaultdict
from itertools import cycle
Expand Down Expand Up @@ -167,6 +168,117 @@ def get_discount_factor(year):

return _df

# Clean the names of consumables in input cost dataframe
def clean_consumable_name(name: str) -> str:
"""
Clean consumable names for analysis and plotting.
Removes procurement suffixes, packaging metadata,
harmonises spelling, and capitalises the first letter.
"""
if not isinstance(name, str):
return name

cleaned = name

# --- 1. Remove common procurement suffixes ---
cleaned = re.sub(
r'_(CMST|IDA|Each_CMST|each_CMST|each|ID|PFR|nt)(\b|_)',
'',
cleaned,
flags=re.IGNORECASE
)

# --- 2. Remove trailing numeric package indicators ---
cleaned = re.sub(r'_\d+(\.\d+)?$', '', cleaned)
cleaned = re.sub(
r'\b\d+\s*(tests|pieces|doses|pack|packs|box|boxes)\b',
'',
cleaned,
flags=re.IGNORECASE
)

# --- 3. Remove awkward characters ---
cleaned = cleaned.replace('Â', '')
cleaned = cleaned.replace('½', '1/2')

# --- 4. Normalise whitespace ---
cleaned = re.sub(r'\s+', ' ', cleaned).strip()

# --- 5. Harmonise common spelling variants ---
harmonisation = {
'Amoxycillin': 'Amoxicillin',
'Gentamycin': 'Gentamicin',
'Declofenac': 'Diclofenac',
'Frusemide': 'Furosemide',
'Cotrimoxizole': 'Cotrimoxazole',
"ringer's lactate": "Ringer's lactate",
}

for old, new in harmonisation.items():
cleaned = re.sub(rf'\b{old}\b', new, cleaned, flags=re.IGNORECASE)

# --- 6. Canonical renaming for key nutrition / diagnostics items ---
canonical_map = {
'Therapeutic spread, sachet 92g/CAR-150':
'Ready-to-use therapeutic food (RUTF)',
'Therapeutic spread, sachet 92g / CAR-150':
'Ready-to-use therapeutic food (RUTF)',
'VL test':
'Viral load test',
'Dietary supplements (country-specific)':
'Multiple micronutrient powder (MNP) supplement'
}

# Apply canonical renaming (case-insensitive exact match)
for old, new in canonical_map.items():
if cleaned.lower() == old.lower():
cleaned = new
break

# --- 7. Capitalise first letter only (preserve acronyms elsewhere) ---
cleaned = re.sub(r'^.', lambda m: m.group(0).upper(), cleaned)

return cleaned

# Clean the names of equipment in the cost dataframe, Drop irrelevant ones
def clean_equipment_name(name: str, equipment_drop_list = None) -> str:
"""
Clean and standardise medical equipment names for analysis.
Applies light normalisation and explicit renaming only.
"""
if not isinstance(name, str):
return name

cleaned = name

# --- 1. Fix known encoding artefacts ---
cleaned = cleaned.replace('â\x80\x99', '’')
cleaned = cleaned.replace('Â', '')

# --- 2. Normalise slashes and whitespace ---
cleaned = re.sub(r'\s*/\s*', ' / ', cleaned)
cleaned = re.sub(r'\s+', ' ', cleaned).strip()

# --- 3. Explicit canonical renaming (keep minimal) ---
rename_map = {
'Image view station, for conferences':
'Clinical image viewing workstation (PACS / case review)',
'Cusco’s / bivalved Speculum (small, medium, large)':
'Cusco’s / bivalved speculum (small, medium, large)',
'Cuscoâ\x80\x99s/ bivalved Speculum (small, medium, large)':
'Cusco’s / bivalved speculum (small, medium, large)',
}

for old, new in rename_map.items():
if cleaned.lower() == old.lower():
cleaned = new
break

# --- 4. Capitalise first letter only (preserve acronyms) ---
cleaned = re.sub(r'^.', lambda m: m.group(0).upper(), cleaned)

return cleaned


def estimate_input_cost_of_scenarios(results_folder: Path,
resourcefilepath: Path,
Expand Down Expand Up @@ -1218,7 +1330,11 @@ def do_stacked_bar_plot_of_cost_by_category(_df: pd.DataFrame,
_scenario_dict: Optional[dict[int, str]] = None,
show_title: bool = True,
_outputfilepath: Optional[Path] = None,
_add_figname_suffix: str = ''):
_add_figname_suffix: str = '',
_label_fontsize: float = 9.0,
_tick_fontsize: float = 10.0,
_legend_label_map: Optional[dict[str, str]] = None
):
"""
Create and save a stacked bar chart of costs by category, subcategory or subgroup.

Expand Down Expand Up @@ -1257,6 +1373,14 @@ def do_stacked_bar_plot_of_cost_by_category(_df: pd.DataFrame,
_add_figname_suffix : str, default ''
Optional string to append to the saved figure's filename

_label_fontsize : float, optional
fontsize of data labels

_tick_fontsize: float, optional
font size of axis ticks

_legend_label_map: dict, optional
Dictionary proving clean category names for publishable legends
Returns:
-------
None
Expand Down Expand Up @@ -1395,10 +1519,11 @@ def do_stacked_bar_plot_of_cost_by_category(_df: pd.DataFrame,
xy=(x, rect.get_y() + height), # Arrow start
xytext=(x + 0.3, rect.get_y() + height + threshold), # Offset text
arrowprops=dict(arrowstyle="->", color='black', lw=0.8),
fontsize='small', ha='left', va='center', color='black'
fontsize=_label_fontsize, ha='left', va='center', color='black', fontweight='bold',
)
else: # Large segment -> label inside
ax.text(x, y, f'{round(height, 1)}', ha='center', va='center', fontsize='small', color='white')
ax.text(x, y, f'{round(height, 1)}', ha='center', va='center', fontsize=_label_fontsize,
fontweight='bold', color='white')

# Set custom x-tick labels if _scenario_dict is provided
if _scenario_dict:
Expand All @@ -1408,7 +1533,7 @@ def do_stacked_bar_plot_of_cost_by_category(_df: pd.DataFrame,

# Wrap x-tick labels for readability
wrapped_labels = [textwrap.fill(str(label), 20) for label in labels]
ax.set_xticklabels(wrapped_labels, rotation=45, ha='right', fontsize='small')
ax.set_xticklabels(wrapped_labels, rotation=45, ha='right', fontsize=_tick_fontsize)

# Period included for plot title and name
if _year == 'all':
Expand All @@ -1419,16 +1544,29 @@ def do_stacked_bar_plot_of_cost_by_category(_df: pd.DataFrame,
period = (f"{min(_year)} - {max(_year)}")

# Save plot
plt.xlabel('Scenario')
plt.ylabel('Cost (2023 USD), millions')
plt.xlabel('Scenario', fontsize = _tick_fontsize, fontweight = 'bold')
plt.ylabel('Cost (2023 USD), millions', fontsize = _tick_fontsize, fontweight = 'bold')

# Arrange the legend in the same ascending order
handles, labels = plt.gca().get_legend_handles_labels()
plt.legend(handles[::-1], labels[::-1], bbox_to_anchor=(1.05, 0.7), loc='center left', fontsize='small')
if _legend_label_map is not None:
labels = [
_legend_label_map.get(label, label)
for label in labels
]

plt.legend(
handles[::-1],
labels[::-1],
bbox_to_anchor=(1.05, 0.7),
loc='center left',
fontsize=_tick_fontsize
)

# Extend the y-axis by 25%
max_y = ax.get_ylim()[1]
ax.set_ylim(0, max_y * 1.25)
ax.tick_params(axis='y', labelsize=_tick_fontsize)

# Save the plot with tight layout
plt.tight_layout(pad=2.0) # Ensure there is enough space for the legend
Expand Down Expand Up @@ -1712,9 +1850,17 @@ def wrap_text(text, width=15):
if _draw is not None:
_df = _df[_df.draw == _draw]

if _year != 'all':
_df = _df[_df['year'].isin(_year)]

if 'mean' in _df.stat.unique():
_df = _df[_df['stat'] == 'mean']
else:
_df = _df[_df['stat'] == 'median']

# Remove non-specific subgroup for consumables
if _cost_category == 'medical consumables':
_df = _df[~(_df.cost_subgroup == 'supply chain (all consumables)')]
_df = _df[~(_df.cost_subgroup.str.contains('all consumables'))] # These are supply chain costs

# Create summary dataframe for treemap
_df = _df.groupby('cost_subgroup')['cost'].sum().reset_index()
Expand Down
Loading