Skip to content

Commit 2b2055e

Browse files
authored
Merge pull request #358 from lsst-dm/tickets/DM-53057
DM-53057: Intermittent ConcurrentModification errors on S3 reads
2 parents cc37d4e + fd08e39 commit 2b2055e

File tree

2 files changed

+34
-12
lines changed

2 files changed

+34
-12
lines changed

python/activator/middleware_interface.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1198,6 +1198,7 @@ def _prep_pipeline_graph(self, pipeline_file) -> lsst.pipe.base.PipelineGraph:
11981198
"""
11991199
return self._prep_pipeline(pipeline_file).to_graph()
12001200

1201+
@connect.retry(2, botocore.exceptions.ClientError, wait=5)
12011202
def _download(self, remote):
12021203
"""Download an image located on a remote store.
12031204

python/shared/raw.py

Lines changed: 33 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -37,19 +37,21 @@
3737
"get_raw_path",
3838
]
3939

40-
import json
4140
import logging
4241
import os
4342
import re
4443
import time
4544
import urllib.parse
4645

46+
import botocore.exceptions
47+
import pydantic_core
4748
import requests
4849

4950
from lsst.obs.lsst import LsstCam, LsstCamImSim, LsstComCam, LsstComCamSim
5051
from lsst.obs.lsst.translators.lsst import LsstBaseTranslator
5152
from lsst.resources import ResourcePath
5253

54+
from .connect_utils import retry
5355
from .visit import FannedOutVisit
5456

5557
_log = logging.getLogger("lsst." + __name__)
@@ -387,19 +389,38 @@ def get_group_id_from_oid(oid: str) -> str:
387389
oid.removesuffix(m["extension"])
388390
+ ".json"
389391
)
390-
# Wait a bit but not too long for the file.
391-
# It should normally show up before the image.
392-
count = 0
393-
while not sidecar.exists():
394-
count += 1
395-
if count > 20:
396-
raise RuntimeError(f"Unable to retrieve JSON sidecar: {sidecar}")
397-
time.sleep(0.1)
392+
group_id = _get_group_id_from_sidecar(sidecar)
393+
return group_id
394+
395+
396+
@retry(4, botocore.exceptions.ClientError, wait=5)
397+
def _get_group_id_from_sidecar(sidecar):
398+
"""Read the group id from a sidecar JSON file.
398399
399-
with sidecar.open("r") as f:
400-
md = json.load(f)
400+
The sidecar file normally show up before the image. If not present, wait a
401+
bit but not too long for the file. Sometimes, the object store gives other
402+
transient ClientErrors, which are retried in a longer timescale.
403+
404+
Parameters
405+
----------
406+
sidecar : `lsst.resources.ResourcePath`
407+
URI to a sidecar JSON file.
401408
402-
return md.get("GROUPID", "")
409+
Returns
410+
-------
411+
group_id : `str`
412+
The group identifier as a string.
413+
"""
414+
count = 0
415+
while count <= 20:
416+
try:
417+
md = pydantic_core.from_json(sidecar.read())
418+
return md.get("GROUPID", "")
419+
# If no such sidecar exists, a FileNotFoundError is raised.
420+
except FileNotFoundError:
421+
count += 1
422+
time.sleep(0.1)
423+
raise RuntimeError(f"Unable to retrieve JSON sidecar: {sidecar}")
403424

404425

405426
def get_raw_path(instrument, detector, group, snap, exposure_id, physical_filter):

0 commit comments

Comments
 (0)