Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
86 changes: 64 additions & 22 deletions src/kbmod/standardizers/butler_standardizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -135,8 +135,9 @@ class ButlerStandardizer(Standardizer):
----------
tgt : `lsst.daf.butler.core.DatasetId`, `lsst.daf.butler.core.DatasetRef` or `int`
Target to standardize.
butler : `lsst.daf.butler.Butler`
Vera C. Rubin Data Butler.
butler : `lsst.daf.butler.Butler` or `list[lsst.daf.butler.Butler]`
Vera C. Rubin Data Butler or a list of butlers. The butlers are queried
to resolve the given target in the given order.
config : `StandardizerConfig`, `dict` or `None`, optional
Configuration key-values used when standardizing the file.

Expand Down Expand Up @@ -181,15 +182,73 @@ def resolveTarget(self, tgt):

return False

def __init__(self, dataId, butler, config=None, **kwargs):
@classmethod
def __query_butler(self, tgt, butler):
"""Given a target and a butler, which might not contain the target
queries the butler to resolve it. Butler failures are silenced.

Has to be called after deffered_import.
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit: deferred_import


Parameters
----------
tgt : `lsst.daf.butler.core.DatasetId`, `lsst.daf.butler.core.DatasetRef` or `int`
Target to standardize.
butler : `lsst.daf.butler.Butler` or `list[lsst.daf.butler.Butler]`
Vera C. Rubin Data Butler or a list of butlers. The butlers are queried
to resolve the given target in the given order.

Raises
------
TypeError : When given target is not a DatasetRef, DatasetId, or unique integer ID"
"""
# including records expands the dataId to include
# key pieces of information such as filter and band
# loading datastore_records could be a shortcut to
# relative path inside the repository
if isinstance(tgt, dafButler.DatasetRef):
ref = tgt
elif isinstance(tgt, dafButler.DatasetId):
ref = butler.get_dataset(tgt, dimension_records=True)
elif isinstance(tgt, (uuid.UUID, str)):
did = dafButler.DatasetId(tgt)
ref = butler.get_dataset(did, dimension_records=True)
else:
raise TypeError("Expected DatasetRef, DatasetId or an unique integer ID, " f"got {tgt} instead.")

return ref, butler

def __init__(self, tgt, butler, config=None, **kwargs):
deferred_import("lsst.daf.butler", "dafButler")

# Sometimes we find ourselves having to process data that is
# in the process of migration between multiple repositories.
# We want to prioritize one of these repos as the preffered
# source of data, but it does not yet contain all data.
# So we check all the given butlers to resolve a target in
# order and then skip once we get a hit. To cover the more
# the plain single-butler use case just promote it to a list.
if isinstance(butler, dafButler.Butler):
butlers = [
butler,
]
else:
butlers = butler
Comment on lines +234 to +235
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

By renaming the argument from butler to butlers, we can get rid of this else branch.


for b in butlers:
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would consider instead something like

        for b in butlers:
            self.ref = self.__query_butler(tgt, b)
            if self.ref is not None:
                # Use the first butler we find our reference in
                self.butler = b
                break

        if self.ref is None:
            raise ValueError(f"Unable to resolve target {tgt} for any butler.")

This helps us get rid of the butler local varible

ref, butler = self.__query_butler(tgt, b)
if ref is not None:
continue

if ref is None:
raise

# Now that target was upgraded to a ref and the correct butler
# is know we can get the info we need from them.
# Somewhere around w_2024_ builds the datastore.root
# was removed as an attribute of the datastore, not sure
# it was ever replaced with anything back-compatible. We simply
# check for the which _datastore attribute is available for this
# butler and then check wherther it has a root or roots attribute.

if hasattr(butler, "datastore"):
datastore_root = butler.datastore.root
elif hasattr(butler, "_datastore"):
Expand All @@ -203,26 +262,9 @@ def __init__(self, dataId, butler, config=None, **kwargs):
raise AttributeError("Butler does not have a valid datastore attribute.")

super().__init__(str(datastore_root), config=config)

self.ref = ref
self.butler = butler

# including records expands the dataId to include
# key pieces of information such as filter and band
# loading datastore_records could be a shortcut to
# relative path inside the repository
if isinstance(dataId, dafButler.DatasetRef):
ref = dataId
elif isinstance(dataId, dafButler.DatasetId):
ref = butler.get_dataset(dataId, dimension_records=True)
elif isinstance(dataId, (uuid.UUID, str)):
did = dafButler.DatasetId(dataId)
ref = butler.get_dataset(did, dimension_records=True)
else:
raise TypeError(
"Expected DatasetRef, DatasetId or an unique integer ID, " f"got {dataId} instead."
)

self.ref = ref
self.exp = None
self.processable = [self.exp]

Expand Down
26 changes: 17 additions & 9 deletions tests/test_butlerstd.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from astropy.time import Time
import numpy as np

from utils import DECamImdiffFactory, MockButler, DatasetRef, DatasetId, dafButler
from utils import DECamImdiffFactory, MockButler, MockFailedButler, DatasetRef, DatasetId, dafButler
from kbmod import Standardizer, StandardizerConfig
from kbmod.core.psf import PSF
from kbmod.standardizers import ButlerStandardizer, ButlerStandardizerConfig, KBMODV1Config
Expand All @@ -28,6 +28,7 @@ class TestButlerStandardizer(unittest.TestCase):

def setUp(self):
self.butler = MockButler("/far/far/away")
self.failed_butler = MockFailedButler("futher/still")

def test_init(self):
"""Test ButlerStandardizer can be built from DatasetRef, DatasetId and
Expand All @@ -44,15 +45,11 @@ def test_init(self):

_ = Standardizer.get(DatasetId(6), butler=self.butler, force=ButlerStandardizer)

def test_standardize(self):
"""Test ButlerStandardizer instantiates and standardizes as expected."""
std = Standardizer.get(DatasetId(7, fill_metadata=True), butler=self.butler)
standardized = std.standardize()

fits = FitsFactory.get_fits(7, spoof_data=True)
def compare_to_expected(self, expected_idx, standardized):
fits = FitsFactory.get_fits(expected_idx, spoof_data=True)
hdr = fits["PRIMARY"].header
expected = {
"dataId": "7",
"dataId": f"{expected_idx}",
"datasetType": "test_datasettype_name",
"visit": int(f"{hdr['EXPNUM']}{hdr['CCDNUM']}"),
"detector": hdr["CCDNUM"],
Expand Down Expand Up @@ -88,6 +85,17 @@ def test_standardize(self):
np.testing.assert_equal([fits["MASK"].data,], standardized["mask"])
# fmt: on

def test_standardize(self):
"""Test ButlerStandardizer instantiates and standardizes as expected."""
std = Standardizer.get(DatasetId(7, fill_metadata=True), butler=self.butler)
standardized = std.standardize()
self.compare_to_expected(7, standardized)

# Test chained resolution works.
std = Standardizer.get(DatasetId(7, fill_metadata=True), butler=[self.failed_butler, self.butler])
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe we could add some small tests that

  1. butler=[self.failed_butler, self.failed_butler] throws an error.
  2. For butler=[self.butler2, self.butler] we could check that std.butler is self.butler2

standardized = std.standardize()
self.compare_to_expected(7, standardized)

def test_standardize_missing_wcs(self):
"""Test ButlerStandardizer instantiates and standardizes as expected een when fits appoximation of the WCS failed."""
missing_wcs_butler = MockButler("/far/far/away", failed_fits_appoximation=True)
Expand Down Expand Up @@ -167,7 +175,7 @@ def test_roundtrip(self):
std = Standardizer.get(DatasetId(8), butler=self.butler)
standardized = std.standardize()

std2 = ButlerStandardizer(**standardized["meta"], butler=self.butler)
std2 = ButlerStandardizer(standardized["meta"]["dataId"], butler=self.butler)
self.assertIsInstance(std2, ButlerStandardizer)

standardized2 = std2.standardize()
Expand Down
33 changes: 33 additions & 0 deletions tests/utils/mock_butler.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@

__all__ = [
"MockButler",
"MockFailedButler",
"Registry",
"Datastore",
"DatasetRef",
Expand Down Expand Up @@ -493,6 +494,38 @@ def mock_exposure(self, ref):
return mocked


class MockFailedButler:
"""Mocked Vera C. Rubin Data Butler that always fails to resolve
any reference.

Keep the same signature for init and the rest of methods, just
fail in a way that mimics the butler instead of resolving it.
"""

def __init__(
self,
root,
ref=None,
mock_images_f=None,
registry=None,
missing_headers=[],
failed_fits_appoximation=False,
):
pass

def getURI(self, ref, dataId=None, collections=None):
return None

def getDataset(self, datid):
return None

def get_dataset(self, datid, dimension_records=False, datastore_records=False):
return None

def get(self, ref, collections=None, dataId=None):
return None


class dafButler:
"""Intercepts calls ``import lsst.daf.butler as dafButler`` and shortcuts
them to our mocks.
Expand Down