Merge pull request #267 from allenai/favyen/20251215-speed-up-docker-build

favyen2 · web-flow · commit 9fae6c85dffb · 2025-12-16T00:14:17.000-05:00
Update olmoearth_pretrain.Dockerfile to speed up builds that use it.
diff --git a/olmoearth_pretrain.Dockerfile b/olmoearth_pretrain.Dockerfile
@@ -3,24 +3,24 @@ FROM pytorch/pytorch:2.7.0-cuda12.8-cudnn9-runtime
 RUN apt update
 RUN apt install -y libpq-dev ffmpeg libsm6 libxext6 git wget
 
-# Install rslearn and olmoearth_pretrain (need to be in local directory).
-COPY ./docker_build/rslearn /opt/rslearn
-COPY ./docker_build/olmoearth_pretrain /opt/olmoearth_pretrain
-
-# We also install terratorch so that we can use the same Docker image for TerraMind
-# experiments.
-RUN pip install --no-cache-dir git+https://github.com/IBM/terratorch.git
-RUN pip install --no-cache-dir geobench==0.0.1
-
-RUN pip install --no-cache-dir --upgrade /opt/rslearn[extra]
-RUN pip install --no-cache-dir --upgrade /opt/olmoearth_pretrain
+# Use uv to install everything.
+COPY --from=ghcr.io/astral-sh/uv:latest /uv /usr/local/bin/uv
 
-COPY requirements-without-rslearn.txt /opt/rslearn_projects/requirements-without-rslearn.txt
+# Install dependencies for rslearn, olmoearth_pretrain, and rslearn_projects.
+COPY docker_build/rslearn/pyproject.toml /opt/rslearn/pyproject.toml
+COPY docker_build/olmoearth_pretrain/pyproject.toml /opt/olmoearth_pretrain/pyproject.toml
+COPY requirements.txt /opt/rslearn_projects/requirements.txt
 COPY requirements-extra.txt /opt/rslearn_projects/requirements-extra.txt
-RUN pip install --no-cache-dir -r /opt/rslearn_projects/requirements-without-rslearn.txt -r /opt/rslearn_projects/requirements-extra.txt
+# Using cache mount here avoids needing to re-download dependencies for later builds if the version didn't change.
+RUN --mount=type=cache,target=/root/.cache/uv uv pip install --system /opt/rslearn[extra] /opt/olmoearth_pretrain -r /opt/rslearn_projects/requirements.txt -r /opt/rslearn_projects/requirements-extra.txt
 
-# Copy rslearn_projects and install it too.
+# Now copy the source code and install for real.
+# If we don't change any dependencies, then only these steps need to be repeated
+# (fast and means the new layers have small size).
+COPY ./docker_build/rslearn /opt/rslearn
+COPY ./docker_build/olmoearth_pretrain /opt/olmoearth_pretrain
 COPY . /opt/rslearn_projects/
-RUN pip install --no-cache-dir /opt/rslearn_projects
+
+RUN --mount=type=cache,target=/root/.cache/uv uv pip install --system /opt/rslearn[extra] /opt/olmoearth_pretrain /opt/rslearn_projects[extra]
 
 WORKDIR /opt/rslearn_projects
diff --git a/rslp/forest_loss_driver/extract_dataset/__init__.py b/rslp/forest_loss_driver/extract_dataset/__init__.py
@@ -67,20 +67,27 @@ class InferenceLayerMaterializeArgs(MaterializePipelineArgs):
     prepare_args: PrepareArgs = field(
         default_factory=lambda: PrepareArgs(
             apply_windows_args=ApplyWindowsArgs(
-                use_initial_job=True, workers=get_default_workers()
+                use_initial_job=True,
+                workers=get_default_workers(),
             ),
+            retry_max_attempts=5,
+            retry_backoff=timedelta(seconds=5),
         )
     )
     ingest_args: IngestArgs = field(
         default_factory=lambda: IngestArgs(
             ignore_errors=True,
             apply_windows_args=ApplyWindowsArgs(workers=get_default_workers()),
+            retry_max_attempts=5,
+            retry_backoff=timedelta(seconds=5),
         )
     )
     materialize_args: MaterializeArgs = field(
         default_factory=lambda: MaterializeArgs(
             ignore_errors=True,
             apply_windows_args=ApplyWindowsArgs(workers=get_default_workers()),
+            retry_max_attempts=5,
+            retry_backoff=timedelta(seconds=5),
         ),
     )
 
@@ -104,22 +111,23 @@ class VisLayerMaterializeArgs(MaterializePipelineArgs):
             apply_windows_args=ApplyWindowsArgs(
                 use_initial_job=True, workers=DEFAULT_VIS_LAYER_WORKERS
             ),
-            retry_max_attempts=20,
-            retry_backoff=timedelta(seconds=30),
+            retry_max_attempts=5,
+            retry_backoff=timedelta(seconds=5),
         ),
     )
     ingest_args: IngestArgs = field(
         default_factory=lambda: IngestArgs(
             apply_windows_args=ApplyWindowsArgs(workers=DEFAULT_VIS_LAYER_WORKERS),
             retry_max_attempts=5,
+            retry_backoff=timedelta(seconds=5),
         )
     )
     materialize_args: MaterializeArgs = field(
         default_factory=lambda: MaterializeArgs(
             ignore_errors=True,
             apply_windows_args=ApplyWindowsArgs(workers=DEFAULT_VIS_LAYER_WORKERS),
             retry_max_attempts=5,
-            retry_backoff=timedelta(seconds=30),
+            retry_backoff=timedelta(seconds=5),
         ),
     )