Skip to content

Commit 683a219

Browse files
Merge pull request #159 from AutomatedProcessImprovement/157-report-runtime
#157 - Report runtime
2 parents a75c534 + 678364d commit 683a219

File tree

6 files changed

+105
-20
lines changed

6 files changed

+105
-20
lines changed

.github/workflows/simod.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@ jobs:
5858
run: poetry run pylint -j 0 --exit-zero src/simod > pylint.txt
5959

6060
- name: Upload PyLint output
61-
uses: actions/upload-artifact@v3
61+
uses: actions/upload-artifact@v4
6262
with:
6363
name: pylint.txt
6464
path: ./pylint.txt
@@ -145,7 +145,7 @@ jobs:
145145
poetry run pip-licenses --with-system --with-urls --format=markdown --output-file=licenses.md
146146
147147
- name: Upload licenses.md
148-
uses: actions/upload-artifact@v3
148+
uses: actions/upload-artifact@v4
149149
with:
150150
name: licenses.md
151151
path: licenses.md

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api"
44

55
[tool.poetry]
66
name = "simod"
7-
version = "5.0.1"
7+
version = "5.0.2"
88
authors = [
99
"Ihar Suvorau <[email protected]>",
1010
"David Chapela <[email protected]>",

src/simod/metrics.py

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
from log_distance_measures.circadian_event_distribution import (
1010
circadian_event_distribution_distance,
1111
)
12+
from log_distance_measures.circadian_workforce_distribution import circadian_workforce_distribution_distance
1213
from log_distance_measures.config import AbsoluteTimestampType
1314
from log_distance_measures.control_flow_log_distance import control_flow_log_distance
1415
from log_distance_measures.cycle_time_distribution import (
@@ -47,6 +48,8 @@ def compute_metric(
4748
result = get_n_grams_distribution_distance(original_log, original_log_ids, simulated_log, simulated_log_ids, 3)
4849
elif metric is Metric.CIRCADIAN_EMD:
4950
result = get_circadian_emd(original_log, original_log_ids, simulated_log, simulated_log_ids)
51+
elif metric is Metric.CIRCADIAN_WORKFORCE_EMD:
52+
result = get_circadian_workforce_emd(original_log, original_log_ids, simulated_log, simulated_log_ids)
5053
elif metric is Metric.ARRIVAL_EMD:
5154
result = get_arrival_emd(original_log, original_log_ids, simulated_log, simulated_log_ids)
5255
elif metric is Metric.RELATIVE_EMD:
@@ -122,6 +125,25 @@ def get_circadian_emd(
122125
return emd
123126

124127

128+
def get_circadian_workforce_emd(
129+
original_log: pd.DataFrame,
130+
original_log_ids: EventLogIDs,
131+
simulated_log: pd.DataFrame,
132+
simulated_log_ids: EventLogIDs,
133+
) -> float:
134+
"""
135+
Distance measure computing how different the histograms of the active resources of two event logs are, comparing the
136+
average number of active resources recorded each weekday at each hour (e.g., Monday 10am).
137+
"""
138+
emd = circadian_workforce_distribution_distance(
139+
original_log,
140+
original_log_ids,
141+
simulated_log,
142+
simulated_log_ids,
143+
)
144+
return emd
145+
146+
125147
def get_arrival_emd(
126148
original_log: pd.DataFrame,
127149
original_log_ids: EventLogIDs,

src/simod/runtime_meter.py

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
import json
2+
import timeit
3+
4+
5+
class RuntimeMeter:
6+
7+
runtime_start: dict
8+
runtime_stop: dict
9+
runtimes: dict
10+
11+
TOTAL: str = "SIMOD_TOTAL_RUNTIME"
12+
INITIAL_MODEL: str = "discover-initial-BPS-model"
13+
CONTROL_FLOW_MODEL: str = "optimize-control-flow-model"
14+
RESOURCE_MODEL: str = "optimize-resource-model"
15+
DATA_ATTRIBUTES_MODEL: str = "discover-data-attributes"
16+
EXTRANEOUS_DELAYS: str = "discover-extraneous-delays"
17+
FINAL_MODEL: str = "discover-final-BPS-model"
18+
EVALUATION: str = "evaluate-final-BPS-model"
19+
20+
def __init__(self):
21+
self.runtime_start = dict()
22+
self.runtime_stop = dict()
23+
self.runtimes = dict()
24+
25+
def start(self, stage_name: str):
26+
self.runtime_start[stage_name] = timeit.default_timer()
27+
28+
def stop(self, stage_name: str):
29+
self.runtime_stop[stage_name] = timeit.default_timer()
30+
self.runtimes[stage_name] = self.runtime_stop[stage_name] - self.runtime_start[stage_name]
31+
32+
def to_json(self) -> str:
33+
return json.dumps(self.runtimes)

src/simod/settings/common_settings.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ class Metric(str, Enum):
1818
TWO_GRAM_DISTANCE = "two_gram_distance"
1919
THREE_GRAM_DISTANCE = "three_gram_distance"
2020
CIRCADIAN_EMD = "circadian_event_distribution"
21+
CIRCADIAN_WORKFORCE_EMD = "circadian_workforce_distribution"
2122
ARRIVAL_EMD = "arrival_event_distribution"
2223
RELATIVE_EMD = "relative_event_distribution"
2324
ABSOLUTE_EMD = "absolute_event_distribution"
@@ -40,6 +41,8 @@ def _from_str(cls, value: str) -> "Metric":
4041
return cls.THREE_GRAM_DISTANCE
4142
elif value.lower() in ["circadian_event_distribution", "circadian_emd"]:
4243
return cls.CIRCADIAN_EMD
44+
elif value.lower() in ["circadian_workforce_distribution", "workforce_emd", "workforce_distribution"]:
45+
return cls.CIRCADIAN_WORKFORCE_EMD
4346
elif value.lower() in ["arrival_event_distribution", "arrival_emd"]:
4447
return cls.ARRIVAL_EMD
4548
elif value.lower() in ["relative_event_distribution", "relative_emd"]:
@@ -66,6 +69,8 @@ def __str__(self):
6669
return "THREE_GRAM_DISTANCE"
6770
elif self == Metric.CIRCADIAN_EMD:
6871
return "CIRCADIAN_EVENT_DISTRIBUTION"
72+
elif self == Metric.CIRCADIAN_WORKFORCE_EMD:
73+
return "CIRCADIAN_WORKFORCE_DISTRIBUTION"
6974
elif self == Metric.ARRIVAL_EMD:
7075
return "ARRIVAL_EVENT_DISTRIBUTION"
7176
elif self == Metric.RELATIVE_EMD:
@@ -140,6 +145,7 @@ def from_dict(config: dict, config_dir: Optional[Path] = None) -> "CommonSetting
140145
Metric.TWO_GRAM_DISTANCE,
141146
Metric.THREE_GRAM_DISTANCE,
142147
Metric.CIRCADIAN_EMD,
148+
Metric.CIRCADIAN_WORKFORCE_EMD,
143149
Metric.ARRIVAL_EMD,
144150
Metric.RELATIVE_EMD,
145151
Metric.ABSOLUTE_EMD,

src/simod/simod.py

Lines changed: 41 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -15,11 +15,12 @@
1515
from pix_framework.io.bpmn import get_activities_names_from_bpmn
1616

1717
from simod.batching.discovery import discover_batching_rules
18-
from simod.data_attributes.discovery import discover_data_attributes
18+
from simod.branch_rules.discovery import discover_branch_rules, map_branch_rules_to_flows
1919
from simod.cli_formatter import print_section, print_subsection
2020
from simod.control_flow.discovery import discover_process_model, add_bpmn_diagram_to_model
2121
from simod.control_flow.optimizer import ControlFlowOptimizer
2222
from simod.control_flow.settings import HyperoptIterationParams as ControlFlowHyperoptIterationParams
23+
from simod.data_attributes.discovery import discover_data_attributes
2324
from simod.event_log.event_log import EventLog
2425
from simod.extraneous_delays.optimizer import ExtraneousDelaysOptimizer
2526
from simod.extraneous_delays.types import ExtraneousDelay
@@ -28,11 +29,11 @@
2829
from simod.resource_model.optimizer import ResourceModelOptimizer
2930
from simod.resource_model.repair import repair_with_missing_activities
3031
from simod.resource_model.settings import HyperoptIterationParams as ResourceModelHyperoptIterationParams
32+
from simod.runtime_meter import RuntimeMeter
3133
from simod.settings.simod_settings import SimodSettings
3234
from simod.simulation.parameters.BPS_model import BPSModel
3335
from simod.simulation.prosimos import simulate_and_evaluate
3436
from simod.utilities import get_process_model_path, get_simulation_parameters_path
35-
from simod.branch_rules.discovery import discover_branch_rules, map_branch_rules_to_flows
3637

3738

3839
class Simod:
@@ -87,6 +88,10 @@ def run(self):
8788
Optimizes the BPS model with the given event log and settings.
8889
"""
8990

91+
# Runtime object
92+
runtimes = RuntimeMeter()
93+
runtimes.start(RuntimeMeter.TOTAL)
94+
9095
# Model activities might be different from event log activities if the model has been provided,
9196
# because we split the event log into train, test, and validation partitions.
9297
# We use model_activities to repair resource_model later after its discovery from a reduced event log.
@@ -96,6 +101,7 @@ def run(self):
96101

97102
# --- Discover Default Case Arrival and Resource Allocation models --- #
98103
print_section("Discovering initial BPS Model")
104+
runtimes.start(RuntimeMeter.INITIAL_MODEL)
99105
self._best_bps_model.case_arrival_model = discover_case_arrival_model(
100106
self._event_log.train_validation_partition, # No optimization process here, use train + validation
101107
self._event_log.log_ids,
@@ -115,43 +121,53 @@ def run(self):
115121
event_log=self._event_log.train_validation_partition,
116122
log_ids=self._event_log.log_ids,
117123
)
124+
runtimes.stop(RuntimeMeter.INITIAL_MODEL)
118125

119126
# --- Control-Flow Optimization --- #
120127
print_section("Optimizing control-flow parameters")
128+
runtimes.start(RuntimeMeter.CONTROL_FLOW_MODEL)
121129
best_control_flow_params = self._optimize_control_flow()
122130
self._best_bps_model.process_model = self._control_flow_optimizer.best_bps_model.process_model
123131
self._best_bps_model.gateway_probabilities = self._control_flow_optimizer.best_bps_model.gateway_probabilities
124132
self._best_bps_model.branch_rules = self._control_flow_optimizer.best_bps_model.branch_rules
133+
runtimes.stop(RuntimeMeter.CONTROL_FLOW_MODEL)
125134

126135
# --- Data Attributes --- #
127136
if (self._settings.common.discover_data_attributes or
128137
self._settings.resource_model.discover_prioritization_rules):
129138
print_section("Discovering data attributes")
139+
runtimes.start(RuntimeMeter.DATA_ATTRIBUTES_MODEL)
130140
global_attributes, case_attributes, event_attributes = discover_data_attributes(
131141
self._event_log.train_validation_partition,
132142
self._event_log.log_ids,
133143
)
134144
self._best_bps_model.global_attributes = global_attributes
135145
self._best_bps_model.case_attributes = case_attributes
136146
self._best_bps_model.event_attributes = event_attributes
147+
runtimes.stop(RuntimeMeter.DATA_ATTRIBUTES_MODEL)
137148

138149
# --- Resource Model Discovery --- #
139150
print_section("Optimizing resource model parameters")
151+
runtimes.start(RuntimeMeter.RESOURCE_MODEL)
140152
best_resource_model_params = self._optimize_resource_model(model_activities)
141153
self._best_bps_model.resource_model = self._resource_model_optimizer.best_bps_model.resource_model
142154
self._best_bps_model.calendar_granularity = self._resource_model_optimizer.best_bps_model.calendar_granularity
143155
self._best_bps_model.prioritization_rules = self._resource_model_optimizer.best_bps_model.prioritization_rules
144156
self._best_bps_model.batching_rules = self._resource_model_optimizer.best_bps_model.batching_rules
157+
runtimes.stop(RuntimeMeter.RESOURCE_MODEL)
145158

146159
# --- Extraneous Delays Discovery --- #
147160
if self._settings.extraneous_activity_delays is not None:
148161
print_section("Discovering extraneous delays")
162+
runtimes.start(RuntimeMeter.EXTRANEOUS_DELAYS)
149163
timers = self._optimize_extraneous_activity_delays()
150164
self._best_bps_model.extraneous_delays = timers
151165
add_timers_to_bpmn_model(self._best_bps_model.process_model, timers) # Update BPMN model on disk
166+
runtimes.stop(RuntimeMeter.EXTRANEOUS_DELAYS)
152167

153168
# --- Discover final BPS model --- #
154169
print_section("Discovering final BPS model")
170+
runtimes.start(RuntimeMeter.FINAL_MODEL)
155171
self.final_bps_model = BPSModel( # Bypass all models already discovered with train+validation
156172
process_model=get_process_model_path(self._best_result_dir, self._event_log.process_name),
157173
case_arrival_model=self._best_bps_model.case_arrival_model,
@@ -187,19 +203,17 @@ def run(self):
187203
bpmn_graph=best_bpmn_graph,
188204
discovery_method=best_control_flow_params.gateway_probabilities_method,
189205
)
190-
191206
# Branch Rules
192207
if self._settings.control_flow.discover_branch_rules:
193208
print_section("Discovering branch conditions")
194209
self.final_bps_model.branch_rules = discover_branch_rules(
195-
best_bpmn_graph,
196-
self._event_log.train_validation_partition,
197-
self._event_log.log_ids,
198-
f_score=best_control_flow_params.f_score
199-
)
210+
best_bpmn_graph,
211+
self._event_log.train_validation_partition,
212+
self._event_log.log_ids,
213+
f_score=best_control_flow_params.f_score
214+
)
200215
self.final_bps_model.gateway_probabilities = \
201216
map_branch_rules_to_flows(self.final_bps_model.gateway_probabilities, self.final_bps_model.branch_rules)
202-
203217
# Resource model
204218
print_subsection("Discovering best resource model")
205219
self.final_bps_model.resource_model = discover_resource_model(
@@ -235,6 +249,9 @@ def run(self):
235249
self.final_bps_model.extraneous_delays = self._best_bps_model.extraneous_delays
236250
add_timers_to_bpmn_model(self.final_bps_model.process_model, self._best_bps_model.extraneous_delays)
237251
self.final_bps_model.replace_activity_names_with_ids()
252+
runtimes.stop(RuntimeMeter.FINAL_MODEL)
253+
runtimes.stop(RuntimeMeter.TOTAL)
254+
238255
# Write JSON parameters to file
239256
json_parameters_path = get_simulation_parameters_path(self._best_result_dir, self._event_log.process_name)
240257
with json_parameters_path.open("w") as f:
@@ -243,14 +260,18 @@ def run(self):
243260
# --- Evaluate final BPS model --- #
244261
if self._settings.common.perform_final_evaluation:
245262
print_subsection("Evaluate")
263+
runtimes.start(RuntimeMeter.EVALUATION)
246264
simulation_dir = self._best_result_dir / "evaluation"
247265
simulation_dir.mkdir(parents=True, exist_ok=True)
248266
self._evaluate_model(self.final_bps_model.process_model, json_parameters_path, simulation_dir)
267+
runtimes.stop(RuntimeMeter.EVALUATION)
249268

250269
# --- Export settings and clean temporal files --- #
270+
print_section(f"Exporting canonical model, runtimes, settings and cleaning up intermediate files")
251271
canonical_model_path = self._best_result_dir / "canonical_model.json"
252-
print_section(f"Exporting canonical model to {canonical_model_path}")
253272
_export_canonical_model(canonical_model_path, best_control_flow_params, best_resource_model_params)
273+
runtimes_model_path = self._best_result_dir / "runtimes.json"
274+
_export_runtimes(runtimes_model_path, runtimes)
254275
if self._settings.common.clean_intermediate_files:
255276
self._clean_up()
256277
self._settings.to_yaml(self._best_result_dir)
@@ -342,14 +363,17 @@ def _export_canonical_model(
342363
control_flow_settings: ControlFlowHyperoptIterationParams,
343364
calendar_settings: ResourceModelHyperoptIterationParams,
344365
):
345-
structure = control_flow_settings.to_dict()
346-
347-
calendars = calendar_settings.to_dict()
348-
349366
canon = {
350-
"control_flow": structure,
351-
"calendars": calendars,
367+
"control_flow": control_flow_settings.to_dict(),
368+
"calendars": calendar_settings.to_dict(),
352369
}
353-
354370
with open(file_path, "w") as f:
355371
json.dump(canon, f)
372+
373+
374+
def _export_runtimes(
375+
file_path: Path,
376+
runtimes: RuntimeMeter
377+
):
378+
with open(file_path, "w") as f:
379+
json.dump(runtimes.runtimes, f)

0 commit comments

Comments
 (0)