Skip to content

Commit e087bd4

Browse files
committed
Merge branch 'tickets/DM-45861' into legacy
2 parents 6ee4e4f + 75223a6 commit e087bd4

File tree

4 files changed

+122
-30
lines changed

4 files changed

+122
-30
lines changed

python/lsst/ctrl/oods/cacheCleaner.py

Lines changed: 26 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
import os
2424
import time
2525

26+
from lsst.ctrl.oods.scanner import Scanner
2627
from lsst.ctrl.oods.timeInterval import TimeInterval
2728

2829
LOGGER = logging.getLogger(__name__)
@@ -139,14 +140,15 @@ async def getFilesOlderThan(self, seconds, directory):
139140
"""
140141
files = []
141142

142-
for dirName, subdirs, fileList in os.walk(directory):
143-
for fname in fileList:
144-
await asyncio.sleep(0)
145-
fullName = os.path.join(dirName, fname)
146-
stat_info = os.stat(fullName)
147-
modification_time = stat_info.st_mtime
148-
if modification_time < seconds:
149-
files.append(fullName)
143+
scanner = Scanner()
144+
async for entry in scanner.scan(directory):
145+
if entry.is_dir():
146+
continue
147+
full_name = entry.path
148+
stat_info = os.stat(full_name)
149+
modification_time = stat_info.st_mtime
150+
if modification_time < seconds:
151+
files.append(full_name)
150152
return files
151153

152154
async def clearEmptyDirectories(self, seconds, directories):
@@ -185,14 +187,15 @@ async def getDirectoriesOlderThan(self, seconds, directory):
185187
"""
186188
directories = []
187189

188-
for root, dirs, files in os.walk(directory, topdown=False):
189-
for name in dirs:
190-
await asyncio.sleep(0)
191-
full_name = os.path.join(root, name)
192-
stat_info = os.stat(full_name)
193-
mtime = stat_info.st_mtime
194-
if mtime < seconds:
195-
directories.append(full_name)
190+
scanner = Scanner()
191+
async for entry in scanner.scan(directory):
192+
if entry.is_file():
193+
continue
194+
full_name = entry.path
195+
stat_info = os.stat(full_name)
196+
modification_time = stat_info.st_mtime
197+
if modification_time < seconds:
198+
directories.append(full_name)
196199
return directories
197200

198201
async def removeEmptyDirectories(self, directories):
@@ -205,9 +208,15 @@ async def removeEmptyDirectories(self, directories):
205208
"""
206209
for directory in sorted(directories, reverse=True):
207210
await asyncio.sleep(0)
208-
if not os.listdir(directory):
211+
if self._isEmpty(directory):
209212
LOGGER.info("removing %s", directory)
210213
try:
211214
os.rmdir(directory)
212215
except Exception as e:
213216
LOGGER.info("Couldn't remove %s: %s", directory, e)
217+
218+
def _isEmpty(self, directory):
219+
with os.scandir(directory) as entries:
220+
for entry in entries:
221+
return False
222+
return True

python/lsst/ctrl/oods/directoryScanner.py

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,7 @@
1818
#
1919
# You should have received a copy of the GNU General Public License
2020
# along with this program. If not, see <https://www.gnu.org/licenses/>.
21-
import asyncio
22-
import os
21+
from lsst.ctrl.oods.scanner import Scanner
2322

2423

2524
class DirectoryScanner(object):
@@ -66,10 +65,10 @@ async def getFiles(self, directory):
6665
files: `list`
6766
list of all files in the given directory
6867
"""
68+
scanner = Scanner()
6969
files = []
70-
for dirName, subdirs, fileList in os.walk(directory):
71-
for fname in fileList:
72-
await asyncio.sleep(0)
73-
fullName = os.path.join(dirName, fname)
74-
files.append(fullName)
70+
async for entry in scanner.scan(directory):
71+
if entry.is_dir():
72+
continue
73+
files.append(entry.path)
7574
return files

python/lsst/ctrl/oods/gen3ButlerIngester.py

Lines changed: 44 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -144,7 +144,19 @@ def undef_metadata(self, filename):
144144
info["SENSOR"] = "S??"
145145
return info
146146

147-
def transmit_status(self, metadata, code, description):
147+
async def print_msg(self, msg):
148+
"""Print message dictionary - used if a CSC has not been created
149+
150+
Parameters
151+
----------
152+
msg: `dict`
153+
Dictionary to print
154+
"""
155+
156+
LOGGER.info(f"would have sent {msg=}")
157+
await asyncio.sleep(0)
158+
159+
async def transmit_status(self, metadata, code, description):
148160
"""Transmit a message with given metadata, status code and description
149161
150162
Parameters
@@ -162,10 +174,14 @@ def transmit_status(self, metadata, code, description):
162174
msg["DESCRIPTION"] = description
163175
LOGGER.info("msg: %s, code: %s, description: %s", msg, code, description)
164176
if self.csc is None:
177+
await self.print_msg(msg)
165178
return
166-
asyncio.run(self.csc.send_imageInOODS(msg))
179+
await self.csc.send_imageInOODS(msg)
167180

168181
def on_success(self, datasets):
182+
asyncio.create_task(self._on_success(datasets))
183+
184+
async def _on_success(self, datasets):
169185
"""Callback used on successful ingest. Used to transmit
170186
successful data ingestion status
171187
@@ -175,12 +191,16 @@ def on_success(self, datasets):
175191
list of DatasetRefs
176192
"""
177193
for dataset in datasets:
194+
await asyncio.sleep(0)
178195
LOGGER.info("file %s successfully ingested", dataset.path.ospath)
179196
image_data = ImageData(dataset)
180197
LOGGER.debug("image_data.get_info() = %s", image_data.get_info())
181-
self.transmit_status(image_data.get_info(), code=0, description="file ingested")
198+
await self.transmit_status(image_data.get_info(), code=0, description="file ingested")
182199

183200
def on_ingest_failure(self, exposures, exc):
201+
asyncio.create_task(self._on_ingest_failure(exposures, exc))
202+
203+
async def _on_ingest_failure(self, exposures, exc):
184204
"""Callback used on ingest failure. Used to transmit
185205
unsuccessful data ingestion status
186206
@@ -193,11 +213,12 @@ def on_ingest_failure(self, exposures, exc):
193213
194214
"""
195215
for f in exposures.files:
216+
await asyncio.sleep(0)
196217
real_file = f.filename.ospath
197218
self.move_file_to_bad_dir(real_file)
198219
cause = self.extract_cause(exc)
199220
info = self.rawexposure_info(f)
200-
self.transmit_status(info, code=1, description=f"ingest failure: {cause}")
221+
await self.transmit_status(info, code=1, description=f"ingest failure: {cause}")
201222

202223
def on_metadata_failure(self, filename, exc):
203224
"""Callback used on metadata extraction failure. Used to transmit
@@ -215,7 +236,7 @@ def on_metadata_failure(self, filename, exc):
215236

216237
cause = self.extract_cause(exc)
217238
info = self.undef_metadata(real_file)
218-
self.transmit_status(info, code=2, description=f"metadata failure: {cause}")
239+
asyncio.create_task(self.transmit_status(info, code=2, description=f"metadata failure: {cause}"))
219240

220241
def move_file_to_bad_dir(self, filename):
221242
bad_dir = self.create_bad_dirname(self.bad_file_dir, self.staging_dir, filename)
@@ -234,6 +255,7 @@ async def ingest(self, file_list):
234255
"""
235256

236257
# Ingest image.
258+
await asyncio.sleep(0)
237259
self.task.run(file_list)
238260

239261
def getName(self):
@@ -251,7 +273,7 @@ async def clean_task(self):
251273
seconds = TimeInterval.calculateTotalSeconds(self.scanInterval)
252274
while True:
253275
if self.csc:
254-
self.csc.log.info("butler repo cleaning started")
276+
self.csc.log.info("calling butler repo clean started")
255277

256278
await self.clean()
257279

@@ -264,6 +286,7 @@ async def clean(self):
264286
were ingested before the configured Interval
265287
"""
266288

289+
await asyncio.sleep(0)
267290
# calculate the time value which is Time.now - the
268291
# "olderThan" configuration
269292
t = Time.now()
@@ -273,10 +296,15 @@ async def clean(self):
273296
)
274297
t = t - td
275298

299+
LOGGER.info("about to createButler()")
276300
butler = self.createButler()
301+
await asyncio.sleep(0)
277302

303+
LOGGER.info("about to refresh()")
278304
butler.registry.refresh()
305+
await asyncio.sleep(0)
279306

307+
LOGGER.info("about to run queryDatasets")
280308
# get all datasets in these collections
281309
allCollections = self.collections if self.cleanCollections is None else self.cleanCollections
282310
all_datasets = set(
@@ -287,21 +315,29 @@ async def clean(self):
287315
bind={"ref_date": t},
288316
)
289317
)
318+
LOGGER.info("done running queryDatasets")
290319
await asyncio.sleep(0)
291320

321+
LOGGER.info("about to run queryCollections")
292322
# get all TAGGED collections
293323
tagged_cols = list(butler.registry.queryCollections(collectionTypes=CollectionType.TAGGED))
324+
LOGGER.info("done running queryCollections")
294325

295326
await asyncio.sleep(0)
296327
# Note: The code below is to get around an issue where passing
297328
# an empty list as the collections argument to queryDatasets
298329
# returns all datasets.
299330
if tagged_cols:
300331
# get all TAGGED datasets
332+
LOGGER.info("about to run queryDatasets for TAGGED collections")
301333
tagged_datasets = set(butler.registry.queryDatasets(datasetType=..., collections=tagged_cols))
334+
LOGGER.info("done running queryDatasets for TAGGED collections; differencing datasets")
335+
await asyncio.sleep(0)
302336

303337
# get a set of datasets in all_datasets, but not in tagged_datasets
304338
ref = all_datasets.difference(tagged_datasets)
339+
LOGGER.info("done differencing datasets")
340+
await asyncio.sleep(0)
305341
else:
306342
# no TAGGED collections, so use all_datasets
307343
ref = all_datasets
@@ -329,4 +365,6 @@ async def clean(self):
329365
LOGGER.info("error removing %s: %s", uri, e)
330366

331367
await asyncio.sleep(0)
368+
LOGGER.info("about to run pruneDatasets")
332369
butler.pruneDatasets(ref, purge=True, unstore=True)
370+
LOGGER.info("done running pruneDatasets")

python/lsst/ctrl/oods/scanner.py

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
# This file is part of ctrl_oods
2+
#
3+
# Developed for the LSST Data Management System.
4+
# This product includes software developed by the LSST Project
5+
# (https://www.lsst.org).
6+
# See the COPYRIGHT file at the top-level directory of this distribution
7+
# for details of code ownership.
8+
#
9+
# This program is free software: you can redistribute it and/or modify
10+
# it under the terms of the GNU General Public License as published by
11+
# the Free Software Foundation, either version 3 of the License, or
12+
# (at your option) any later version.
13+
#
14+
# This program is distributed in the hope that it will be useful,
15+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
16+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17+
# GNU General Public License for more details.
18+
#
19+
# You should have received a copy of the GNU General Public License
20+
# along with this program. If not, see <https://www.gnu.org/licenses/>.
21+
import asyncio
22+
import os
23+
24+
25+
class Scanner(object):
26+
27+
async def scan(self, directory):
28+
"""Return entries in a directory tree
29+
30+
Parameters
31+
----------
32+
directory: `str`
33+
directory to scan
34+
35+
Returns
36+
-------
37+
entry: `DirEntry`
38+
directory entry
39+
"""
40+
await asyncio.sleep(0)
41+
for entry in os.scandir(directory):
42+
await asyncio.sleep(0)
43+
yield entry
44+
if entry.is_dir(follow_symlinks=False):
45+
async for sub_entry in self.scan(entry.path):
46+
yield sub_entry

0 commit comments

Comments
 (0)