Skip to content

Commit a31efe6

Browse files
committed
Use the new Butler query system in _filter_datasets for region query
The new Butler query systems supports spatial-constraint query via lsst.sphgeom.Region directly. With this change, we use it in template and refcat search. This needs stack w_2024_38 or newer. make_export.py uses _filter_datasets so it needs to adjust to the new underlying API too.
1 parent f38bcf6 commit a31efe6

File tree

2 files changed

+38
-43
lines changed

2 files changed

+38
-43
lines changed

python/activator/middleware_interface.py

Lines changed: 23 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -42,10 +42,9 @@
4242
import lsst.ctrl.mpexec
4343
from lsst.ctrl.mpexec import SeparablePipelineExecutor, SingleQuantumExecutor, MPGraphExecutor
4444
from lsst.daf.butler import Butler, CollectionType, DatasetType, Timespan
45-
from lsst.daf.butler.registry import MissingDatasetTypeError
45+
from lsst.daf.butler import DataIdValueError, MissingDatasetTypeError
4646
import lsst.dax.apdb
4747
import lsst.geom
48-
from lsst.meas.algorithms.htmIndexer import HtmIndexer
4948
import lsst.obs.base
5049
import lsst.pipe.base
5150
import lsst.analysis.tools
@@ -624,21 +623,16 @@ def _export_refcats(self, region):
624623
refcats : iterable [`DatasetRef`]
625624
The refcats to be exported, after any filtering.
626625
"""
627-
center = lsst.geom.SpherePoint(region.getCentroid())
628-
radius = max([center.separation(lsst.geom.SpherePoint(vertex)) for vertex in region.getVertices()])
629-
indexer = HtmIndexer(depth=7)
630-
shard_ids, _ = indexer.getShardIds(center, radius)
631-
htm_where = f"htm7 in ({','.join(str(x) for x in shard_ids)})"
632626
# Get shards from all refcats that overlap this region.
633627
possible_refcats = _get_refcat_types(self.central_butler)
634-
_log.debug("Searching for refcats of types %s in %s...",
635-
{t.name for t in possible_refcats}, shard_ids)
628+
_log.debug("Searching for refcats of types %s.", {t.name for t in possible_refcats})
636629
refcats = set(_filter_datasets(
637630
self.central_butler, self.butler,
638631
possible_refcats,
639632
collections=self.instrument.makeRefCatCollectionName(),
640-
where=htm_where,
641-
findFirst=True,
633+
where="htm7.region OVERLAPS search_region",
634+
bind={"search_region": region},
635+
find_first=True,
642636
all_callback=self._mark_dataset_usage,
643637
))
644638
if refcats:
@@ -668,16 +662,11 @@ def _export_skymap_and_templates(self, region, filter):
668662
["skyMap"],
669663
skymap=self.skymap_name,
670664
collections=self._collection_skymap,
671-
findFirst=True,
665+
find_first=True,
672666
all_callback=self._mark_dataset_usage,
673667
))
674668
_log.debug("Found %d new skymap datasets.", len(skymaps))
675669

676-
# htm7 is too coarse and many more patches than necessary would be selected.
677-
# But searching Butler with htm higher level does not work.
678-
# TODO: This will be replaced by the new spatial query feature in Butler.
679-
template_where = " OR ".join([f"htm7 in ({range[0]}..{range[1]})"
680-
for range in lsst.sphgeom.HtmPixelization(7).interior(region).ranges()])
681670
try:
682671
_log.debug("Searching for templates.")
683672
templates = set(_filter_datasets(
@@ -687,8 +676,9 @@ def _export_skymap_and_templates(self, region, filter):
687676
instrument=self.instrument.getName(),
688677
skymap=self.skymap_name,
689678
physical_filter=filter,
690-
where=template_where,
691-
findFirst=True,
679+
where="patch.region OVERLAPS search_region",
680+
bind={"search_region": region},
681+
find_first=True,
692682
all_callback=self._mark_dataset_usage,
693683
))
694684
except _MissingDatasetError as err:
@@ -719,8 +709,6 @@ def _export_calibs(self, detector_id, filter):
719709
# Some calibs have an exposure ID (of the source dataset?), but these can't be used in AP.
720710
type_names = {t.name for t in self.central_butler.registry.queryDatasetTypes()
721711
if t.isCalibration() and "exposure" not in t.dimensions}
722-
# TODO: we can't use findFirst=True yet because findFirst query
723-
# in CALIBRATION-type collection is not supported currently.
724712
# For now, filter down to the dataset types that exist in the specific calib collection.
725713
# TODO: A new query API after DM-45873 may replace or improve this usage.
726714
# TODO: DM-40245 to identify the datasets.
@@ -734,6 +722,7 @@ def _export_calibs(self, detector_id, filter):
734722
instrument=self.instrument.getName(),
735723
detector=detector_id,
736724
physical_filter=filter,
725+
find_first=True,
737726
calib_date=calib_date,
738727
all_callback=self._mark_dataset_usage,
739728
))
@@ -761,7 +750,7 @@ def _export_ml_models(self):
761750
self.central_butler, self.butler,
762751
["pretrainedModelPackage"],
763752
collections=self._collection_ml_model,
764-
findFirst=True,
753+
find_first=True,
765754
all_callback=self._mark_dataset_usage,
766755
))
767756
except _MissingDatasetError as err:
@@ -1750,8 +1739,7 @@ def _filter_datasets(src_repo: Butler,
17501739
"""Identify datasets in a source repository, filtering out those already
17511740
present in a destination.
17521741
1753-
Unlike Butler or database queries, this method raises if nothing in the
1754-
source repository matches the query criteria.
1742+
This method raises if nothing in the source repository matches the query criteria.
17551743
17561744
Parameters
17571745
----------
@@ -1770,7 +1758,7 @@ def _filter_datasets(src_repo: Butler,
17701758
This callable is not called if the query returns no results.
17711759
*args, **kwargs
17721760
Parameters for describing the dataset query. They have the same
1773-
meanings as the parameters of `lsst.daf.butler.Registry.queryDatasets`.
1761+
meanings as the parameters of `lsst.daf.butler.query_datasets`
17741762
The query must be valid for both ``src_repo`` and ``dest_repo``.
17751763
17761764
Returns
@@ -1798,8 +1786,12 @@ def _filter_datasets(src_repo: Butler,
17981786
known_datasets = set()
17991787
for dataset_type in dataset_types:
18001788
try:
1801-
subset = set(dest_repo.registry.queryDatasets(dataset_type, *args, **kwargs))
1802-
except lsst.daf.butler.registry.DataIdValueError as e:
1789+
# Okay to have empty results.
1790+
subset = set(dest_repo.query_datasets(dataset_type, explain=False, *args, **kwargs))
1791+
except MissingDatasetTypeError as e:
1792+
_log.debug("Pre-export query with args '%s' failed with %s", formatted_args, e)
1793+
# If dataset type never registered locally, then *any* such datasets are missing.
1794+
except DataIdValueError as e:
18031795
_log.debug("Pre-export query with args '%s' failed with %s", formatted_args, e)
18041796
# If dimensions are invalid, then *any* such datasets are missing.
18051797
else:
@@ -1814,7 +1806,8 @@ def _filter_datasets(src_repo: Butler,
18141806
level=logging.DEBUG):
18151807
src_datasets = set()
18161808
for dataset_type in dataset_types:
1817-
src_datasets |= set(src_repo.registry.queryDatasets(dataset_type, *args, **kwargs).expanded())
1809+
# explain=False because empty query result is ok here and we don't need it to raise an error.
1810+
src_datasets |= set(src_repo.query_datasets(dataset_type, explain=False, *args, **kwargs))
18181811
# In many contexts, src_datasets is too large to print.
18191812
_log_trace3.debug("Source datasets: %s", src_datasets)
18201813
if calib_date:
@@ -1826,6 +1819,8 @@ def _filter_datasets(src_repo: Butler,
18261819
))
18271820
_log_trace.debug("Sources filtered to %s: %s", calib_date.iso, src_datasets)
18281821
if not src_datasets:
1822+
# The downstream method decides what to do with empty results.
1823+
# DM-40245 and DM-46178 may change this.
18291824
raise _MissingDatasetError(
18301825
"Source repo query with args '{}' found no matches.".format(formatted_args))
18311826
if all_callback:

tests/test_middleware_interface.py

Lines changed: 15 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -910,17 +910,17 @@ def test_filter_datasets(self):
910910
# Case where src is empty now covered in test_filter_datasets_nosrc.
911911
for src, existing in itertools.product(combinations, [set()] + combinations):
912912
diff = src - existing
913-
src_butler = unittest.mock.Mock(
914-
**{"registry.queryDatasets.return_value.expanded.return_value": src})
915-
existing_butler = unittest.mock.Mock(**{"registry.queryDatasets.return_value": existing})
913+
src_butler = unittest.mock.Mock(**{"query_datasets.return_value": src})
914+
existing_butler = unittest.mock.Mock(**{"query_datasets.return_value": existing})
916915

917916
with self.subTest(src=sorted(ref.dataId["detector"] for ref in src),
918917
existing=sorted(ref.dataId["detector"] for ref in existing)):
919918
result = set(_filter_datasets(src_butler, existing_butler,
920919
["bias"], instrument="LSSTComCamSim"))
921-
src_butler.registry.queryDatasets.assert_called_once_with("bias", instrument="LSSTComCamSim")
922-
existing_butler.registry.queryDatasets.assert_called_once_with("bias",
923-
instrument="LSSTComCamSim")
920+
src_butler.query_datasets.assert_called_once_with(
921+
"bias", instrument="LSSTComCamSim", explain=False)
922+
existing_butler.query_datasets.assert_called_once_with(
923+
"bias", instrument="LSSTComCamSim", explain=False)
924924
self.assertEqual(result, diff)
925925

926926
def test_filter_datasets_nodim(self):
@@ -933,15 +933,15 @@ def test_filter_datasets_nodim(self):
933933
data1 = self._make_expanded_ref(registry, "skyMap", {"skymap": skymap_name}, "dummy")
934934

935935
src_butler = unittest.mock.Mock(
936-
**{"registry.queryDatasets.return_value.expanded.return_value": {data1}})
936+
**{"query_datasets.return_value": {data1}})
937937
existing_butler = unittest.mock.Mock(
938-
**{"registry.queryDatasets.side_effect":
938+
**{"query_datasets.side_effect":
939939
lsst.daf.butler.registry.DataIdValueError(
940940
f"Unknown values specified for governor dimension skymap: {{{skymap_name}}}")
941941
})
942942

943943
result = set(_filter_datasets(src_butler, existing_butler, ["skyMap"], ..., skymap="mymap"))
944-
src_butler.registry.queryDatasets.assert_called_once_with("skyMap", ..., skymap="mymap")
944+
src_butler.query_datasets.assert_called_once_with("skyMap", ..., skymap="mymap", explain=False)
945945
self.assertEqual(result, {data1})
946946

947947
def test_filter_datasets_nosrc(self):
@@ -955,9 +955,9 @@ def test_filter_datasets_nosrc(self):
955955
"dummy")
956956

957957
src_butler = unittest.mock.Mock(
958-
**{"registry.queryDatasets.return_value.expanded.return_value": set()})
958+
**{"query_datasets.return_value": set()})
959959
for existing in [set(), {data1}]:
960-
existing_butler = unittest.mock.Mock(**{"registry.queryDatasets.return_value": existing})
960+
existing_butler = unittest.mock.Mock(**{"query_datasets.return_value": existing})
961961

962962
with self.subTest(existing=sorted(ref.dataId["detector"] for ref in existing)):
963963
with self.assertRaises(_MissingDatasetError):
@@ -982,8 +982,8 @@ def test_function(expected, incoming):
982982
# Case where src is empty covered below.
983983
for src, existing in itertools.product(combinations, [set()] + combinations):
984984
src_butler = unittest.mock.Mock(
985-
**{"registry.queryDatasets.return_value.expanded.return_value": src})
986-
existing_butler = unittest.mock.Mock(**{"registry.queryDatasets.return_value": existing})
985+
**{"query_datasets.return_value": src})
986+
existing_butler = unittest.mock.Mock(**{"query_datasets.return_value": existing})
987987

988988
with self.subTest(src=sorted(ref.dataId["detector"] for ref in src),
989989
existing=sorted(ref.dataId["detector"] for ref in existing)):
@@ -997,8 +997,8 @@ def non_callable(_):
997997

998998
for existing in [set()] + combinations:
999999
src_butler = unittest.mock.Mock(
1000-
**{"registry.queryDatasets.return_value.expanded.return_value": set()})
1001-
existing_butler = unittest.mock.Mock(**{"registry.queryDatasets.return_value": existing})
1000+
**{"query_datasets.return_value": set()})
1001+
existing_butler = unittest.mock.Mock(**{"query_datasets.return_value": existing})
10021002

10031003
with self.subTest(existing=sorted(ref.dataId["detector"] for ref in existing)):
10041004
with self.assertRaises(_MissingDatasetError):

0 commit comments

Comments
 (0)