Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Store event times using CTAO high resolution timestamps #2707

Open
wants to merge 6 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions docs/api-reference/time/index.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
.. _time:

**********************
time (`~ctapipe.time`)
**********************

.. currentmodule:: ctapipe.time


Reference/API
=============

.. automodapi:: ctapipe.time
:no-inheritance-diagram:
9 changes: 9 additions & 0 deletions docs/changes/2707.datamodel.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
Change format in which event timestamps are written to HDF5 files.
Instead of a single float64 MJD value which has ~µs precision,
the CTAO high precision time format is used now.
This stores the timestamp as two uint32 values: seconds
and quarter nanoseconds since ``1970-01-01T00:00:00.0 TAI``.

This only affects the storage format and the precision,
the in-memory API is unchanged as it relies on ``astropy.time.Time``
and values are converted when reading/writing from/to HDF5.
2 changes: 1 addition & 1 deletion src/ctapipe/instrument/optics.py
Original file line number Diff line number Diff line change
Expand Up @@ -378,7 +378,7 @@ class ComaPSFModel(PSFModel):
help=(
"Describes the dependency of the polar scale on the "
"distance to the center of the camera. Used to calculate "
"the width :math:`S_\phi` of the polar Laplacian in the PSF "
r"the width :math:`S_\phi` of the polar Laplacian in the PSF "
"as a function of the distance r to the optical axis"
)
).tag(config=True)
Expand Down
6 changes: 5 additions & 1 deletion src/ctapipe/io/astropy_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,7 @@ def write_table(
append=False,
overwrite=False,
mode="a",
time_format="ctao_high_res",
filters=DEFAULT_FILTERS,
):
"""Write a table to an HDF5 file
Expand All @@ -145,6 +146,9 @@ def write_table(
mode: str
If given a path for ``h5file``, it will be opened in this mode.
See the docs of ``tables.open_file``.
time_format: str
Format to use for storing time columns.
Either 'ctao_high_res' (the default) or a format supported by `astropy.time.Time`.
"""
copied = False
parent, table_name = os.path.split(path)
Expand Down Expand Up @@ -173,7 +177,7 @@ def write_table(
attrs[f"CTAFIELD_{pos}_DESC"] = column.description

if isinstance(column, Time):
transform = TimeColumnTransform(scale="tai", format="mjd")
transform = TimeColumnTransform(scale="tai", format=time_format)
attrs.update(transform.get_meta(pos))

if copied is False:
Expand Down
12 changes: 9 additions & 3 deletions src/ctapipe/io/datawriter.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,8 +45,10 @@ def _get_tel_index(event, tel_id):
# (meaning readers need to update scripts)
# - increase the minor number if new columns or datasets are added
# - increase the patch number if there is a small bugfix to the model.
DATA_MODEL_VERSION = "v6.0.0"
DATA_MODEL_VERSION = "v7.0.0"
DATA_MODEL_CHANGE_HISTORY = """
- v7.0.0: - Use high resolution timestamps for times. CTAO high resolution times
are stored as two uint32: seconds and quarter nanoseconds since 1970-01-01T00:00:00 TAI.
- v6.0.0: - Change R1- and DL0-waveform shape from (n_pixels, n_samples) to be always
(n_channels, n_pixels, n_samples).
- v5.1.0: - Remove redundant 'is_valid' column in ``DispContainer``.
Expand Down Expand Up @@ -534,10 +536,14 @@ def _write_scheduling_and_observation_blocks(self):
)

for sb in self.event_source.scheduling_blocks.values():
self._writer.write("configuration/observation/scheduling_block", sb)
self._writer.write(
"configuration/observation/scheduling_block", sb, time_format="mjd"
)

for ob in self.event_source.observation_blocks.values():
self._writer.write("configuration/observation/observation_block", ob)
self._writer.write(
"configuration/observation/observation_block", ob, time_format="mjd"
)

def _write_simulation_configuration(self):
"""
Expand Down
26 changes: 18 additions & 8 deletions src/ctapipe/io/hdf5eventsource.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,7 @@
"v5.0.0",
"v5.1.0",
"v6.0.0",
"v7.0.0",
]


Expand Down Expand Up @@ -314,15 +315,24 @@ def is_compatible(file_path):

# we can now read both R1 and DL1
has_muons = "/dl1/event/telescope/muon" in f.root
has_sim = "/simulation/event/telescope" in f.root
has_trigger = "/simulation/event/telescope" in f.root

datalevels = set(metadata["CTA PRODUCT DATA LEVELS"].split(","))
datalevels = datalevels & {
"R1",
"DL1_IMAGES",
"DL1_PARAMETERS",
"DL2",
"DL1_MUON",
}
if not datalevels and not has_muons:
datalevels = (
len(
datalevels
& {
"R1",
"DL1_IMAGES",
"DL1_PARAMETERS",
"DL2",
"DL1_MUON",
}
)
> 0
)
if not any([datalevels, has_sim, has_trigger, has_muons]):
return False

return True
Expand Down
30 changes: 21 additions & 9 deletions src/ctapipe/io/hdf5tableio.py
Original file line number Diff line number Diff line change
Expand Up @@ -257,7 +257,9 @@
def close(self):
self.h5file.close()

def _add_column_to_schema(self, table_name, schema, meta, field, name, value):
def _add_column_to_schema(

Check failure on line 260 in src/ctapipe/io/hdf5tableio.py

View check run for this annotation

CTAO-DPPS-SonarQube / ctapipe Sonarqube Results

src/ctapipe/io/hdf5tableio.py#L260

Refactor this function to reduce its Cognitive Complexity from 16 to the 15 allowed.
self, table_name, schema, meta, field, name, value, time_format
):
typename = ""
shape = 1

Expand Down Expand Up @@ -302,9 +304,13 @@
schema.columns[name] = coltype(shape=shape, pos=pos)

elif isinstance(value, Time):
# TODO: really should use MET, but need a func for that
schema.columns[name] = tables.Float64Col(pos=pos)
tr = TimeColumnTransform(scale="tai", format="mjd")
tr = TimeColumnTransform(scale="tai", format=time_format)
value = tr(value)

typename = value.dtype.name
coltype = PYTABLES_TYPE_MAP[typename]
shape = value.shape
schema.columns[name] = coltype(shape=shape, pos=pos)
self.add_column_transform(table_name, name, tr)

elif type(value).__name__ in PYTABLES_TYPE_MAP:
Expand Down Expand Up @@ -341,7 +347,7 @@

return True

def _create_hdf5_table_schema(self, table_name, containers):
def _create_hdf5_table_schema(self, table_name, containers, time_format):
"""
Creates a pytables description class for the given containers
and registers it in the Writer
Expand Down Expand Up @@ -384,6 +390,7 @@
field=field,
name=col_name,
value=value,
time_format=time_format,
)
except ValueError:
self.log.warning(
Expand All @@ -396,11 +403,13 @@
meta["CTAPIPE_VERSION"] = ctapipe.__version__
return meta

def _setup_new_table(self, table_name, containers):
def _setup_new_table(self, table_name, containers, time_format):
"""set up the table. This is called the first time `write()`
is called on a new table"""
self.log.debug("Initializing table '%s' in group '%s'", table_name, self._group)
meta = self._create_hdf5_table_schema(table_name, containers)
meta = self._create_hdf5_table_schema(
table_name, containers, time_format=time_format
)

if table_name.startswith("/"):
raise ValueError("Table name must not start with '/'")
Expand Down Expand Up @@ -455,7 +464,7 @@
raise
row.append()

def write(self, table_name, containers):
def write(self, table_name, containers, time_format="ctao_high_res"):
"""
Write the contents of the given container or containers to a table.
The first call to write will create a schema and initialize the table
Expand All @@ -469,12 +478,15 @@
name of table to write to
containers: `ctapipe.core.Container` or `Iterable[ctapipe.core.Container]`
container to write
time_format: str
Format to use for storing time columns.
Either 'ctao_high_res' (the default) or a format supported by `astropy.time.Time`.
"""
if isinstance(containers, Container):
containers = (containers,)

if table_name not in self._schemas:
self._setup_new_table(table_name, containers)
self._setup_new_table(table_name, containers, time_format=time_format)

self._append_row(table_name, containers)

Expand Down
13 changes: 12 additions & 1 deletion src/ctapipe/io/tableio.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@

from ..core import Component
from ..instrument import SubarrayDescription
from ..time import ctao_high_res_to_time, time_to_ctao_high_res

__all__ = ["TableReader", "TableWriter"]

Expand Down Expand Up @@ -305,7 +306,7 @@ def get_meta(self, colname):


class TimeColumnTransform(ColumnTransform):
"""A Column transformation that converts astropy time objects to MJD TAI"""
"""A Column transformation that converts astropy time objects to a specific format."""

def __init__(self, scale, format):
self.scale = scale
Expand All @@ -315,9 +316,19 @@ def __call__(self, value: Time):
"""
Convert an astropy time object to an mjd value in tai scale
"""
# ctao_high_res is not implemented as an astropy format but using
# conversion functions defined in ctapipe
if self.format == "ctao_high_res":
return time_to_ctao_high_res(value)
# for all other formats, use astropy mechanism
return getattr(getattr(value, self.scale), self.format)

def inverse(self, value):
# ctao_high_res is not implemented as an astropy format but using
# conversion functions defined in ctapipe
if self.format == "ctao_high_res":
return ctao_high_res_to_time(value[..., 0], value[..., 1])
# for all other formats, use astropy mechanism
return Time(value, scale=self.scale, format=self.format, copy=COPY_IF_NEEDED)

def get_meta(self, colname):
Expand Down
10 changes: 2 additions & 8 deletions src/ctapipe/io/tests/test_datawriter.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,10 +71,7 @@ def test_write(tmpdir: Path):
"""

output_path = Path(tmpdir / "events.dl1.h5")
source = EventSource(
get_dataset_path("gamma_prod5.simtel.zst"),
focal_length_choice="EQUIVALENT",
)
source = EventSource(get_dataset_path("gamma_prod5.simtel.zst"))
calibrate = CameraCalibrator(subarray=source.subarray)

with DataWriter(
Expand Down Expand Up @@ -149,10 +146,7 @@ def test_roundtrip(tmpdir: Path):
"""

output_path = Path(tmpdir / "events.DL1DL2.h5")
source = EventSource(
get_dataset_path("gamma_prod5.simtel.zst"),
focal_length_choice="EQUIVALENT",
)
source = EventSource(get_dataset_path("gamma_prod5.simtel.zst"))
calibrate = CameraCalibrator(subarray=source.subarray)

events = []
Expand Down
64 changes: 58 additions & 6 deletions src/ctapipe/io/tests/test_hdf5.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
from ctapipe.io import read_table
from ctapipe.io.datalevels import DataLevel
from ctapipe.io.hdf5tableio import HDF5TableReader, HDF5TableWriter
from ctapipe.time import time_to_ctao_high_res


@pytest.fixture(scope="session")
Expand Down Expand Up @@ -658,7 +659,7 @@ class SomeContainer(Container):
"mytable", "image", FixedPointColumnTransform(100, 0, np.float64, np.int32)
)
# add user generated transform for the "value" column
writer.write("mytable", cont)
writer.write("mytable", cont, time_format="mjd")

# check that we get a length-3 array when reading back
with HDF5TableReader(tmp_file, mode="r") as reader:
Expand Down Expand Up @@ -741,7 +742,14 @@ class SomeContainer(Container):
assert data.hillas_y == 10


def test_time(tmp_path):
@pytest.mark.parametrize(
("time_format", "tolerance"),
[
("mjd", 0.1 * u.us),
("ctao_high_res", 0.01 * u.ns),
],
)
def test_time(tmp_path, time_format, tolerance):
tmp_file = tmp_path / "test_time.hdf5"

class TimeContainer(Container):
Expand All @@ -751,15 +759,16 @@ class TimeContainer(Container):
container = TimeContainer(time=time)

with HDF5TableWriter(tmp_file, group_name="data") as writer:
# add user generated transform for the "value" column
writer.write("table", container)
writer.write("table", container, time_format=time_format)

with HDF5TableReader(tmp_file, mode="r") as reader:
for data in reader.read("/data/table", TimeContainer):
assert isinstance(data.time, Time)
assert data.time.scale == "tai"
assert data.time.format == "mjd"
assert (data.time - time).to(u.s).value < 1e-7
assert data.time.format == (
"unix_tai" if time_format == "ctao_high_res" else time_format
)
assert np.abs((data.time - time).to(u.s)) < tolerance


def test_filters(tmp_path):
Expand Down Expand Up @@ -1019,3 +1028,46 @@ def test_hdf5_datalevels(input_type, dl2_shower_geometry_file):
DataLevel.DL1_PARAMETERS,
DataLevel.DL2,
}


def test_column_transform_high_res_timestamp(tmp_path):
"""Test high-res timestamp column transformation."""
from ctapipe.containers import NAN_TIME
from ctapipe.io.tableio import TimeColumnTransform

tmp_file = tmp_path / "test_high_res_time.hdf5"

class SomeContainer(Container):
default_prefix = ""
time = Field(NAN_TIME)

# low-resolution timestamp only has ~us precision, so test we can actually
# store much smaller differences
times = Time.now() + [0, 0.25, 0.5, 0.75, 1.0, 1.25] * u.ns

with HDF5TableWriter(tmp_file) as writer:
writer.add_column_transform(
"events", "time", TimeColumnTransform("tai", "ctao_high_res")
)

for t in times:
writer.write("events", SomeContainer(time=t))

# check that we get a length-3 array when reading back
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this comment accurate? I don't see anything like a length check happening here.

with HDF5TableReader(tmp_file, mode="r") as reader:
times_read = [c.time for c in reader.read("/events", SomeContainer)]
assert isinstance(times_read[0], Time)
assert times_read[0].scale == "tai"
assert times_read[0].format == "unix_tai"

# convert list of scalar time to 1d Time object
times_read = Time(times_read)
# check absolute error due to floating point imprecision
roundtrip_error = (times - times_read).to_value(u.ns)
np.testing.assert_array_less(np.abs(roundtrip_error), 0.01)

# check that times are exactly equal in ctao representation
np.testing.assert_array_equal(
time_to_ctao_high_res(times),
time_to_ctao_high_res(times_read),
)
2 changes: 1 addition & 1 deletion src/ctapipe/io/tests/test_hdf5eventsource.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ def test_is_compatible(compatible_file, request):
def test_metadata(dl1_file):
with HDF5EventSource(input_url=dl1_file) as source:
assert source.is_simulation
assert source.datamodel_version == (6, 0, 0)
assert source.datamodel_version == (7, 0, 0)
assert set(source.datalevels) == {
DataLevel.DL1_IMAGES,
DataLevel.DL1_PARAMETERS,
Expand Down
Loading