42
42
import lsst .ctrl .mpexec
43
43
from lsst .ctrl .mpexec import SeparablePipelineExecutor , SingleQuantumExecutor , MPGraphExecutor
44
44
from lsst .daf .butler import Butler , CollectionType , DatasetType , Timespan
45
- from lsst .daf .butler . registry import MissingDatasetTypeError
45
+ from lsst .daf .butler import DataIdValueError , MissingDatasetTypeError
46
46
import lsst .dax .apdb
47
47
import lsst .geom
48
- from lsst .meas .algorithms .htmIndexer import HtmIndexer
49
48
import lsst .obs .base
50
49
import lsst .pipe .base
51
50
import lsst .analysis .tools
@@ -624,21 +623,16 @@ def _export_refcats(self, region):
624
623
refcats : iterable [`DatasetRef`]
625
624
The refcats to be exported, after any filtering.
626
625
"""
627
- center = lsst .geom .SpherePoint (region .getCentroid ())
628
- radius = max ([center .separation (lsst .geom .SpherePoint (vertex )) for vertex in region .getVertices ()])
629
- indexer = HtmIndexer (depth = 7 )
630
- shard_ids , _ = indexer .getShardIds (center , radius )
631
- htm_where = f"htm7 in ({ ',' .join (str (x ) for x in shard_ids )} )"
632
626
# Get shards from all refcats that overlap this region.
633
627
possible_refcats = _get_refcat_types (self .central_butler )
634
- _log .debug ("Searching for refcats of types %s in %s..." ,
635
- {t .name for t in possible_refcats }, shard_ids )
628
+ _log .debug ("Searching for refcats of types %s." , {t .name for t in possible_refcats })
636
629
refcats = set (_filter_datasets (
637
630
self .central_butler , self .butler ,
638
631
possible_refcats ,
639
632
collections = self .instrument .makeRefCatCollectionName (),
640
- where = htm_where ,
641
- findFirst = True ,
633
+ where = "htm7.region OVERLAPS search_region" ,
634
+ bind = {"search_region" : region },
635
+ find_first = True ,
642
636
all_callback = self ._mark_dataset_usage ,
643
637
))
644
638
if refcats :
@@ -668,16 +662,11 @@ def _export_skymap_and_templates(self, region, filter):
668
662
["skyMap" ],
669
663
skymap = self .skymap_name ,
670
664
collections = self ._collection_skymap ,
671
- findFirst = True ,
665
+ find_first = True ,
672
666
all_callback = self ._mark_dataset_usage ,
673
667
))
674
668
_log .debug ("Found %d new skymap datasets." , len (skymaps ))
675
669
676
- # htm7 is too coarse and many more patches than necessary would be selected.
677
- # But searching Butler with htm higher level does not work.
678
- # TODO: This will be replaced by the new spatial query feature in Butler.
679
- template_where = " OR " .join ([f"htm7 in ({ range [0 ]} ..{ range [1 ]} )"
680
- for range in lsst .sphgeom .HtmPixelization (7 ).interior (region ).ranges ()])
681
670
try :
682
671
_log .debug ("Searching for templates." )
683
672
templates = set (_filter_datasets (
@@ -687,8 +676,9 @@ def _export_skymap_and_templates(self, region, filter):
687
676
instrument = self .instrument .getName (),
688
677
skymap = self .skymap_name ,
689
678
physical_filter = filter ,
690
- where = template_where ,
691
- findFirst = True ,
679
+ where = "patch.region OVERLAPS search_region" ,
680
+ bind = {"search_region" : region },
681
+ find_first = True ,
692
682
all_callback = self ._mark_dataset_usage ,
693
683
))
694
684
except _MissingDatasetError as err :
@@ -719,8 +709,6 @@ def _export_calibs(self, detector_id, filter):
719
709
# Some calibs have an exposure ID (of the source dataset?), but these can't be used in AP.
720
710
type_names = {t .name for t in self .central_butler .registry .queryDatasetTypes ()
721
711
if t .isCalibration () and "exposure" not in t .dimensions }
722
- # TODO: we can't use findFirst=True yet because findFirst query
723
- # in CALIBRATION-type collection is not supported currently.
724
712
# For now, filter down to the dataset types that exist in the specific calib collection.
725
713
# TODO: A new query API after DM-45873 may replace or improve this usage.
726
714
# TODO: DM-40245 to identify the datasets.
@@ -734,6 +722,7 @@ def _export_calibs(self, detector_id, filter):
734
722
instrument = self .instrument .getName (),
735
723
detector = detector_id ,
736
724
physical_filter = filter ,
725
+ find_first = True ,
737
726
calib_date = calib_date ,
738
727
all_callback = self ._mark_dataset_usage ,
739
728
))
@@ -761,7 +750,7 @@ def _export_ml_models(self):
761
750
self .central_butler , self .butler ,
762
751
["pretrainedModelPackage" ],
763
752
collections = self ._collection_ml_model ,
764
- findFirst = True ,
753
+ find_first = True ,
765
754
all_callback = self ._mark_dataset_usage ,
766
755
))
767
756
except _MissingDatasetError as err :
@@ -1750,8 +1739,7 @@ def _filter_datasets(src_repo: Butler,
1750
1739
"""Identify datasets in a source repository, filtering out those already
1751
1740
present in a destination.
1752
1741
1753
- Unlike Butler or database queries, this method raises if nothing in the
1754
- source repository matches the query criteria.
1742
+ This method raises if nothing in the source repository matches the query criteria.
1755
1743
1756
1744
Parameters
1757
1745
----------
@@ -1770,7 +1758,7 @@ def _filter_datasets(src_repo: Butler,
1770
1758
This callable is not called if the query returns no results.
1771
1759
*args, **kwargs
1772
1760
Parameters for describing the dataset query. They have the same
1773
- meanings as the parameters of `lsst.daf.butler.Registry.queryDatasets`.
1761
+ meanings as the parameters of `lsst.daf.butler.query_datasets`
1774
1762
The query must be valid for both ``src_repo`` and ``dest_repo``.
1775
1763
1776
1764
Returns
@@ -1798,8 +1786,12 @@ def _filter_datasets(src_repo: Butler,
1798
1786
known_datasets = set ()
1799
1787
for dataset_type in dataset_types :
1800
1788
try :
1801
- subset = set (dest_repo .registry .queryDatasets (dataset_type , * args , ** kwargs ))
1802
- except lsst .daf .butler .registry .DataIdValueError as e :
1789
+ # Okay to have empty results.
1790
+ subset = set (dest_repo .query_datasets (dataset_type , explain = False , * args , ** kwargs ))
1791
+ except MissingDatasetTypeError as e :
1792
+ _log .debug ("Pre-export query with args '%s' failed with %s" , formatted_args , e )
1793
+ # If dataset type never registered locally, then *any* such datasets are missing.
1794
+ except DataIdValueError as e :
1803
1795
_log .debug ("Pre-export query with args '%s' failed with %s" , formatted_args , e )
1804
1796
# If dimensions are invalid, then *any* such datasets are missing.
1805
1797
else :
@@ -1814,7 +1806,8 @@ def _filter_datasets(src_repo: Butler,
1814
1806
level = logging .DEBUG ):
1815
1807
src_datasets = set ()
1816
1808
for dataset_type in dataset_types :
1817
- src_datasets |= set (src_repo .registry .queryDatasets (dataset_type , * args , ** kwargs ).expanded ())
1809
+ # explain=False because empty query result is ok here and we don't need it to raise an error.
1810
+ src_datasets |= set (src_repo .query_datasets (dataset_type , explain = False , * args , ** kwargs ))
1818
1811
# In many contexts, src_datasets is too large to print.
1819
1812
_log_trace3 .debug ("Source datasets: %s" , src_datasets )
1820
1813
if calib_date :
@@ -1826,6 +1819,8 @@ def _filter_datasets(src_repo: Butler,
1826
1819
))
1827
1820
_log_trace .debug ("Sources filtered to %s: %s" , calib_date .iso , src_datasets )
1828
1821
if not src_datasets :
1822
+ # The downstream method decides what to do with empty results.
1823
+ # DM-40245 and DM-46178 may change this.
1829
1824
raise _MissingDatasetError (
1830
1825
"Source repo query with args '{}' found no matches." .format (formatted_args ))
1831
1826
if all_callback :
0 commit comments