Skip to content

Commit

Permalink
add an assertion to check if unique identifier column exceed a specfi…
Browse files Browse the repository at this point in the history
…c value, which is given by data (and may exceed in far future
  • Loading branch information
Bogdan-Wiederspan committed Nov 7, 2024
1 parent 4beb2ec commit 81d6ef7
Showing 1 changed file with 18 additions and 5 deletions.
23 changes: 18 additions & 5 deletions hbt/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@
from columnflow.types import Any
from columnflow.columnar_util import ArrayFunction, deferred_column

import numpy as np


@deferred_column
def IF_NANO_V9(self: ArrayFunction.DeferredColumn, func: ArrayFunction) -> Any | set[Any]:
Expand Down Expand Up @@ -59,17 +61,28 @@ def IF_DATASET_IS_DY(
return self.get() if func.dataset_inst.has_tag("is_dy") else None


def hash_events(arr):
def hash_events(arr: np.ndarray) -> np.ndarray:
"""
Helper function to create a hash value from the event, run and luminosityBlock columns.
The values are padded to specific lengths and concatenated to a single integer.
"""
# TODO what is a good value here?
max_digits_run = 4
max_digits_luminosityBlock = 3 + max_digits_run
def assert_value(arr: np.ndarray, field: str, max_value: int) -> None:
"""
Helper function to check if a column does not exceed a maximum value.
"""
digits = len(str(arr[field].to_numpy().max()))
assert digits <= max_value, f"{field} digit count is {digits} and exceed max value {max_value}"

max_digits_run = 6
max_digits_luminosityBlock = 5
max_digits_event = 7

assert_value(arr, "run", max_digits_run)
assert_value(arr, "luminosityBlock", max_digits_luminosityBlock)
assert_value(arr, "event", max_digits_event)

hash_value = (
arr.event * 10**max_digits_luminosityBlock +
arr.event * 10**(max_digits_luminosityBlock + max_digits_run) +
arr.luminosityBlock * 10**max_digits_run +
arr.run
)
Expand Down

0 comments on commit 81d6ef7

Please sign in to comment.