-
Notifications
You must be signed in to change notification settings - Fork 19
Add butler chaining to ButlerStandardizer #1130
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -135,8 +135,9 @@ class ButlerStandardizer(Standardizer): | |
| ---------- | ||
| tgt : `lsst.daf.butler.core.DatasetId`, `lsst.daf.butler.core.DatasetRef` or `int` | ||
| Target to standardize. | ||
| butler : `lsst.daf.butler.Butler` | ||
| Vera C. Rubin Data Butler. | ||
| butler : `lsst.daf.butler.Butler` or `list[lsst.daf.butler.Butler]` | ||
| Vera C. Rubin Data Butler or a list of butlers. The butlers are queried | ||
| to resolve the given target in the given order. | ||
| config : `StandardizerConfig`, `dict` or `None`, optional | ||
| Configuration key-values used when standardizing the file. | ||
|
|
||
|
|
@@ -181,15 +182,73 @@ def resolveTarget(self, tgt): | |
|
|
||
| return False | ||
|
|
||
| def __init__(self, dataId, butler, config=None, **kwargs): | ||
| @classmethod | ||
| def __query_butler(self, tgt, butler): | ||
| """Given a target and a butler, which might not contain the target | ||
| queries the butler to resolve it. Butler failures are silenced. | ||
|
|
||
| Has to be called after deffered_import. | ||
|
|
||
| Parameters | ||
| ---------- | ||
| tgt : `lsst.daf.butler.core.DatasetId`, `lsst.daf.butler.core.DatasetRef` or `int` | ||
| Target to standardize. | ||
| butler : `lsst.daf.butler.Butler` or `list[lsst.daf.butler.Butler]` | ||
| Vera C. Rubin Data Butler or a list of butlers. The butlers are queried | ||
| to resolve the given target in the given order. | ||
|
|
||
| Raises | ||
| ------ | ||
| TypeError : When given target is not a DatasetRef, DatasetId, or unique integer ID" | ||
| """ | ||
| # including records expands the dataId to include | ||
| # key pieces of information such as filter and band | ||
| # loading datastore_records could be a shortcut to | ||
| # relative path inside the repository | ||
| if isinstance(tgt, dafButler.DatasetRef): | ||
| ref = tgt | ||
| elif isinstance(tgt, dafButler.DatasetId): | ||
| ref = butler.get_dataset(tgt, dimension_records=True) | ||
| elif isinstance(tgt, (uuid.UUID, str)): | ||
| did = dafButler.DatasetId(tgt) | ||
| ref = butler.get_dataset(did, dimension_records=True) | ||
| else: | ||
| raise TypeError("Expected DatasetRef, DatasetId or an unique integer ID, " f"got {tgt} instead.") | ||
|
|
||
| return ref, butler | ||
|
|
||
| def __init__(self, tgt, butler, config=None, **kwargs): | ||
| deferred_import("lsst.daf.butler", "dafButler") | ||
|
|
||
| # Sometimes we find ourselves having to process data that is | ||
| # in the process of migration between multiple repositories. | ||
| # We want to prioritize one of these repos as the preffered | ||
| # source of data, but it does not yet contain all data. | ||
| # So we check all the given butlers to resolve a target in | ||
| # order and then skip once we get a hit. To cover the more | ||
| # the plain single-butler use case just promote it to a list. | ||
| if isinstance(butler, dafButler.Butler): | ||
| butlers = [ | ||
| butler, | ||
| ] | ||
| else: | ||
| butlers = butler | ||
|
Comment on lines
+234
to
+235
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. By renaming the argument from butler to butlers, we can get rid of this else branch. |
||
|
|
||
| for b in butlers: | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I would consider instead something like This helps us get rid of the |
||
| ref, butler = self.__query_butler(tgt, b) | ||
| if ref is not None: | ||
| continue | ||
|
|
||
| if ref is None: | ||
| raise | ||
|
|
||
| # Now that target was upgraded to a ref and the correct butler | ||
| # is know we can get the info we need from them. | ||
| # Somewhere around w_2024_ builds the datastore.root | ||
| # was removed as an attribute of the datastore, not sure | ||
| # it was ever replaced with anything back-compatible. We simply | ||
| # check for the which _datastore attribute is available for this | ||
| # butler and then check wherther it has a root or roots attribute. | ||
|
|
||
| if hasattr(butler, "datastore"): | ||
| datastore_root = butler.datastore.root | ||
| elif hasattr(butler, "_datastore"): | ||
|
|
@@ -203,26 +262,9 @@ def __init__(self, dataId, butler, config=None, **kwargs): | |
| raise AttributeError("Butler does not have a valid datastore attribute.") | ||
|
|
||
| super().__init__(str(datastore_root), config=config) | ||
|
|
||
| self.ref = ref | ||
| self.butler = butler | ||
|
|
||
| # including records expands the dataId to include | ||
| # key pieces of information such as filter and band | ||
| # loading datastore_records could be a shortcut to | ||
| # relative path inside the repository | ||
| if isinstance(dataId, dafButler.DatasetRef): | ||
| ref = dataId | ||
| elif isinstance(dataId, dafButler.DatasetId): | ||
| ref = butler.get_dataset(dataId, dimension_records=True) | ||
| elif isinstance(dataId, (uuid.UUID, str)): | ||
| did = dafButler.DatasetId(dataId) | ||
| ref = butler.get_dataset(did, dimension_records=True) | ||
| else: | ||
| raise TypeError( | ||
| "Expected DatasetRef, DatasetId or an unique integer ID, " f"got {dataId} instead." | ||
| ) | ||
|
|
||
| self.ref = ref | ||
| self.exp = None | ||
| self.processable = [self.exp] | ||
|
|
||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -5,7 +5,7 @@ | |
| from astropy.time import Time | ||
| import numpy as np | ||
|
|
||
| from utils import DECamImdiffFactory, MockButler, DatasetRef, DatasetId, dafButler | ||
| from utils import DECamImdiffFactory, MockButler, MockFailedButler, DatasetRef, DatasetId, dafButler | ||
| from kbmod import Standardizer, StandardizerConfig | ||
| from kbmod.core.psf import PSF | ||
| from kbmod.standardizers import ButlerStandardizer, ButlerStandardizerConfig, KBMODV1Config | ||
|
|
@@ -28,6 +28,7 @@ class TestButlerStandardizer(unittest.TestCase): | |
|
|
||
| def setUp(self): | ||
| self.butler = MockButler("/far/far/away") | ||
| self.failed_butler = MockFailedButler("futher/still") | ||
|
|
||
| def test_init(self): | ||
| """Test ButlerStandardizer can be built from DatasetRef, DatasetId and | ||
|
|
@@ -44,15 +45,11 @@ def test_init(self): | |
|
|
||
| _ = Standardizer.get(DatasetId(6), butler=self.butler, force=ButlerStandardizer) | ||
|
|
||
| def test_standardize(self): | ||
| """Test ButlerStandardizer instantiates and standardizes as expected.""" | ||
| std = Standardizer.get(DatasetId(7, fill_metadata=True), butler=self.butler) | ||
| standardized = std.standardize() | ||
|
|
||
| fits = FitsFactory.get_fits(7, spoof_data=True) | ||
| def compare_to_expected(self, expected_idx, standardized): | ||
| fits = FitsFactory.get_fits(expected_idx, spoof_data=True) | ||
| hdr = fits["PRIMARY"].header | ||
| expected = { | ||
| "dataId": "7", | ||
| "dataId": f"{expected_idx}", | ||
| "datasetType": "test_datasettype_name", | ||
| "visit": int(f"{hdr['EXPNUM']}{hdr['CCDNUM']}"), | ||
| "detector": hdr["CCDNUM"], | ||
|
|
@@ -88,6 +85,17 @@ def test_standardize(self): | |
| np.testing.assert_equal([fits["MASK"].data,], standardized["mask"]) | ||
| # fmt: on | ||
|
|
||
| def test_standardize(self): | ||
| """Test ButlerStandardizer instantiates and standardizes as expected.""" | ||
| std = Standardizer.get(DatasetId(7, fill_metadata=True), butler=self.butler) | ||
| standardized = std.standardize() | ||
| self.compare_to_expected(7, standardized) | ||
|
|
||
| # Test chained resolution works. | ||
| std = Standardizer.get(DatasetId(7, fill_metadata=True), butler=[self.failed_butler, self.butler]) | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Maybe we could add some small tests that
|
||
| standardized = std.standardize() | ||
| self.compare_to_expected(7, standardized) | ||
|
|
||
| def test_standardize_missing_wcs(self): | ||
| """Test ButlerStandardizer instantiates and standardizes as expected een when fits appoximation of the WCS failed.""" | ||
| missing_wcs_butler = MockButler("/far/far/away", failed_fits_appoximation=True) | ||
|
|
@@ -167,7 +175,7 @@ def test_roundtrip(self): | |
| std = Standardizer.get(DatasetId(8), butler=self.butler) | ||
| standardized = std.standardize() | ||
|
|
||
| std2 = ButlerStandardizer(**standardized["meta"], butler=self.butler) | ||
| std2 = ButlerStandardizer(standardized["meta"]["dataId"], butler=self.butler) | ||
| self.assertIsInstance(std2, ButlerStandardizer) | ||
|
|
||
| standardized2 = std2.standardize() | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Nit: deferred_import