Skip to content

Commit 2cc47c4

Browse files
authored
Merge pull request #45 from BAMresearch/BaseData-modification
Base data modification, simplified to remove the scalar and offset as it made universal maths very complex. Pipeline processes can work on multiple scalar BaseData operations before applying them to the main BaseData arrays. No dunder math methods have been implemented here yet, I'm testing various uncertainty handling approaches, so for now we will leave the implementation to the processing modules themselves. auto_uncertainties is a strong contender at the moment.
2 parents 78e4156 + 59a4b65 commit 2cc47c4

File tree

14 files changed

+729
-189
lines changed

14 files changed

+729
-189
lines changed

.gitignore

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ develop-eggs
2121
lib
2222
lib64
2323
venv*/
24-
.venv
24+
.venv*
2525
pyvenv*/
2626
pip-wheel-metadata/
2727

docs/full_flow.drawio

Lines changed: 44 additions & 38 deletions
Large diffs are not rendered by default.

src/modacor/dataclasses/basedata.py

Lines changed: 274 additions & 71 deletions
Large diffs are not rendered by default.
Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
# /usr/bin/env python3
2+
# -*- coding: utf-8 -*-
3+
4+
from __future__ import annotations
5+
6+
__coding__ = "utf-8"
7+
__author__ = "Brian R. Pauw"
8+
__license__ = "BSD3"
9+
__copyright__ = "Copyright 2025, The MoDaCor team"
10+
__date__ = "24/05/2025"
11+
__version__ = "20250524.1"
12+
__status__ = "Development" # "Development", "Production"
13+
from typing import Any, Dict
14+
15+
import numpy as np
16+
import pint
17+
from attrs import define, field
18+
from attrs import validators as v
19+
20+
from modacor import ureg
21+
from modacor.administration.licenses import BSD3Clause as __license__ # noqa: F401
22+
23+
from .validators import arrays_are_equal_shape
24+
25+
# end of header and standard imports
26+
27+
28+
@define
29+
class SourceData:
30+
"""
31+
SourceData is used for a data value or array loaded from an IoSource.
32+
Punitive defaults have been set to encourage compliance.
33+
"""
34+
35+
# data, units and variance are required
36+
value: np.ndarray = field(validator=v.instance_of(np.ndarray))
37+
units: pint.Unit = field(validator=v.instance_of(ureg.Unit))
38+
variance: pint.Unit = field(validator=v.instance_of(ureg.Unit))
39+
attributes: Dict[str, Any] = field(factory=dict)
40+
41+
def __attrs_post_init__(self):
42+
if not arrays_are_equal_shape(self.data, self.variance):
43+
raise ValueError("Data and variance arrays must have the same shape.")

src/modacor/dataclasses/validators.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@
55
from numbers import Integral
66
from typing import Any, Type
77

8+
import numpy as np
9+
810
from modacor import ureg
911

1012
from .databundle import DataBundle
@@ -49,6 +51,18 @@ def check_data(
4951
return True
5052

5153

54+
def arrays_are_equal_shape(
55+
array1: np.ndarray,
56+
array2: np.ndarray,
57+
) -> bool:
58+
"""
59+
Check if two arrays have the same shape.
60+
"""
61+
if array1.shape != array2.shape:
62+
return False
63+
return True
64+
65+
5266
def check_data_element_and_units(
5367
data: DataBundle,
5468
data_element_name: str,

src/modacor/io/hdf/hdf_loader.py

Lines changed: 18 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -31,18 +31,17 @@
3131
from os.path import abspath
3232

3333
import h5py
34-
import numpy as np
3534

36-
from modacor.dataclasses.messagehandler import *
35+
from modacor.dataclasses.basedata import BaseData
36+
from modacor.dataclasses.messagehandler import MessageHandler
3737

3838
from ..io_source import IoSource
39-
from ..io_sources import IoSources
4039

4140

4241
class HDFLoader(IoSource):
43-
def __init__(self, source_reference: str, logging_level = WARNING):
42+
def __init__(self, source_reference: str, logging_level=WARNING):
4443
super().__init__(source_reference)
45-
self.hdf_logger = MessageHandler(level = logging_level, name = 'hdf5logger')
44+
self.hdf_logger = MessageHandler(level=logging_level, name="hdf5logger")
4645
self._file_path = None
4746
self._file_reference = None
4847
self._file_datasets = []
@@ -74,7 +73,20 @@ def _close_file(self):
7473
raise OSError(error)
7574

7675
def _find_datasets(self, path_name, path_object):
77-
"""An internal function to be used to walk the tree of an HDF5 file and return a list of the datasets within"""
76+
"""
77+
An internal function to be used to walk the tree of an HDF5 file and return a list of
78+
the datasets within
79+
"""
7880
if isinstance(self._file_reference[path_name], h5py._hl.dataset.Dataset):
7981
self._file_datasets.append(path_name)
8082
self._file_datasets_shapes[path_name] = self._file_reference[path_name].shape
83+
84+
def get_data(self, data_key: str) -> BaseData:
85+
raise (NotImplementedError("get_data method not yet implemented in HDFLoader class."))
86+
87+
def get_static_metadata(self, data_key):
88+
raise (
89+
NotImplementedError(
90+
"get_static_metadata method not yet implemented in HDFLoader class."
91+
)
92+
)

src/modacor/io/io_source.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -30,9 +30,10 @@
3030

3131
from typing import Any
3232

33-
import numpy as np
3433
from attrs import define, field
3534

35+
from modacor.dataclasses.source_data import SourceData
36+
3637

3738
def default_config() -> dict[str, Any]:
3839
"""
@@ -77,9 +78,9 @@ class IoSource:
7778

7879
configuration: dict[str, Any] = field(factory=default_config)
7980

80-
def get_data(self, index: int, data_key: str) -> np.ndarray:
81+
def get_data(self, index: int, data_key: str) -> SourceData:
8182
"""
82-
Get data from the IO source using the provided data key.
83+
Get data and metadata from the IO source using the provided data key.
8384
8485
Parameters
8586
----------
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
# SPDX-License-Identifier: BSD-3-Clause
2+
# Copyright 2025 MoDaCor Authors
3+
#
4+
# Redistribution and use in source and binary forms, with or without modification,
5+
# are permitted provided that the following conditions are met:
6+
# 1. Redistributions of source code must retain the above copyright notice, this
7+
# list of conditions and the following disclaimer.
8+
# 2. Redistributions in binary form must reproduce the above copyright notice,
9+
# this list of conditions and the following disclaimer in the documentation
10+
# and/or other materials provided with the distribution.
11+
# 3. Neither the name of the copyright holder nor the names of its contributors
12+
# may be used to endorse or promote products derived from this software without
13+
# specific prior written permission.
14+
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS “AS IS” AND
15+
# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
16+
# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
17+
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
18+
# ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
19+
# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
20+
# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
21+
# ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22+
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
23+
# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24+
25+
__license__ = "BSD-3-Clause"
26+
__copyright__ = "Copyright 2025 MoDaCor Authors"
27+
__status__ = "Alpha"
Lines changed: 96 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,96 @@
1+
# /usr/bin/env python3
2+
# -*- coding: utf-8 -*-
3+
4+
from __future__ import annotations
5+
6+
from pathlib import Path
7+
from typing import Any
8+
9+
import yaml
10+
11+
from modacor.dataclasses.source_data import SourceData
12+
13+
__coding__ = "utf-8"
14+
__author__ = "Brian R. Pauw"
15+
__license__ = "BSD3"
16+
__copyright__ = "Copyright 2025, The MoDaCor team"
17+
__date__ = "24/05/2025"
18+
__version__ = "20250524.1"
19+
__status__ = "Development" # "Development", "Production"
20+
from logging import WARNING
21+
22+
import h5py
23+
import numpy as np
24+
25+
from modacor.administration.licenses import BSD3Clause as __license__ # noqa: F401
26+
from modacor.dataclasses.messagehandler import MessageHandler
27+
28+
from ..io_source import IoSource
29+
30+
# end of header and standard imports
31+
32+
33+
class StaticMetadata(IoSource):
34+
"""
35+
This IoSource is used to load and make experiment metadata available to
36+
the processing pipeline modules.
37+
It can be filled in with information such as wavelength,
38+
geometry and other relevant information which is needed in multiple
39+
processing steps.
40+
The metadata can be loaded from a yaml file with mappings. this is set in the configuraiton
41+
The entries are returned as BaseData elements, with units and uncertainties.
42+
"""
43+
44+
_data_cache: dict[str, SourceData] = None
45+
_static_metadata_cache: dict[str, Any] = None
46+
47+
def __init__(self, source_reference: str, logging_level=WARNING):
48+
super().__init__(source_reference)
49+
self.logger = MessageHandler(level=logging_level, name="StaticMetadata")
50+
self._data_cache = {} # for values with units and uncertainties
51+
self._static_metadata_cache = {} # for other elements such as strings and tags
52+
53+
def _load_from_yaml(self, file_path: Path) -> None:
54+
"""
55+
Load static metadata from a YAML file.
56+
This method should be implemented to parse the YAML file and populate
57+
the _data_cache with SourceData objects.
58+
"""
59+
assert file_path.exists(), f"Static metadataa file {file_path} does not exist."
60+
with open(file_path, "r") as f:
61+
data = yaml.safe_load(f)
62+
63+
for key, entry in data.items():
64+
if isinstance(entry, dict):
65+
if all(k in entry for k in ("value", "units", "variance")):
66+
self._data_cache[key] = SourceData(
67+
value=np.array(entry.pop("value", [])),
68+
units=entry.pop("units", "rankine"),
69+
variance=np.array(entry.pop("variance", [])),
70+
attributes=entry if entry else {},
71+
)
72+
else:
73+
# invalid entry, raise an error or log it
74+
self.logger.error(
75+
f"Invalid entry for key '{key}': {entry}. Expected 'value', 'units', and 'variance'."
76+
)
77+
else:
78+
# Store other metadata as static metadata
79+
self._static_metadata_cache[key] = entry
80+
81+
def get_static_metadata(self, data_key: str) -> Any:
82+
if data_key not in self._static_metadata_cache:
83+
self.logger.error(f"Static metadata key '{data_key}' not in cache.")
84+
return None
85+
86+
return self._static_metadata_cache.get(data_key)
87+
88+
def get_data(self, data_key: str) -> SourceData:
89+
"""
90+
Get the data from the HDF5 file.
91+
"""
92+
if data_key not in self._data_cache:
93+
self.logger.error(f"Data key '{data_key}' not in static metadata cache.")
94+
return None
95+
96+
return self._data_cache.get(data_key)

src/modacor/modules/base_modules/multiply_by_variable.py renamed to src/modacor/modules/base_modules/multiply_by_scalar.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
from modacor.math.variance_calculations import divide
1818

1919

20-
class MultiplyByVariable(ProcessStep):
20+
class MultiplyByScalar(ProcessStep):
2121
"""
2222
Adding Poisson uncertainties to the data
2323
"""

0 commit comments

Comments
 (0)