diff --git a/hbt/util.py b/hbt/util.py index b3f43a7..6f1c013 100644 --- a/hbt/util.py +++ b/hbt/util.py @@ -11,6 +11,8 @@ from columnflow.types import Any from columnflow.columnar_util import ArrayFunction, deferred_column +import numpy as np + @deferred_column def IF_NANO_V9(self: ArrayFunction.DeferredColumn, func: ArrayFunction) -> Any | set[Any]: @@ -59,17 +61,28 @@ def IF_DATASET_IS_DY( return self.get() if func.dataset_inst.has_tag("is_dy") else None -def hash_events(arr): +def hash_events(arr: np.ndarray) -> np.ndarray: """ Helper function to create a hash value from the event, run and luminosityBlock columns. The values are padded to specific lengths and concatenated to a single integer. """ - # TODO what is a good value here? - max_digits_run = 4 - max_digits_luminosityBlock = 3 + max_digits_run + def assert_value(arr: np.ndarray, field: str, max_value: int) -> None: + """ + Helper function to check if a column does not exceed a maximum value. + """ + digits = len(str(arr[field].to_numpy().max())) + assert digits <= max_value, f"{field} digit count is {digits} and exceed max value {max_value}" + + max_digits_run = 6 + max_digits_luminosityBlock = 5 + max_digits_event = 7 + + assert_value(arr, "run", max_digits_run) + assert_value(arr, "luminosityBlock", max_digits_luminosityBlock) + assert_value(arr, "event", max_digits_event) hash_value = ( - arr.event * 10**max_digits_luminosityBlock + + arr.event * 10**(max_digits_luminosityBlock + max_digits_run) + arr.luminosityBlock * 10**max_digits_run + arr.run )