From d2738e70e5ccf3ef8115d05a016058c907bece31 Mon Sep 17 00:00:00 2001 From: Brian Kroth Date: Tue, 15 Oct 2024 15:57:56 -0500 Subject: [PATCH] Simplify devcontainer (#864) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit # Pull Request ## Title Combines container build steps to reduce devcontainer size substantially. --- ## Description Reduces container size from ~5.7GB to ~2.7GB by combining the following steps: - install conda (previously in a base image layer) - create the base mlos environment This reduces the space substantially because conda's usual attempt to do hardlinks across environments is actually able to work. The downside is that changes to base package level requirements require reinstalling all of conda again (i.e., the single large combined container layer is less cacheable). Should help with issues like: https://github.com/Microsoft-CISL/sqlite-autotuning/issues/7 --- ## Type of Change - 🔄 Refactor - Dev environment --- ## Testing - `time make devcontainer` - "Rebuild devcontainer" inside VSCode - `docker image ls` to check the sizes --- --------- Co-authored-by: Sergiy Matusevych --- .devcontainer/Dockerfile | 115 +++++++++++++++++++++------------------ 1 file changed, 61 insertions(+), 54 deletions(-) diff --git a/.devcontainer/Dockerfile b/.devcontainer/Dockerfile index 8702d314cbc..99b21ede545 100644 --- a/.devcontainer/Dockerfile +++ b/.devcontainer/Dockerfile @@ -1,7 +1,7 @@ # Copyright (c) Microsoft Corporation. # Licensed under the MIT License. -FROM mcr.microsoft.com/devcontainers/miniconda:3 AS base +FROM mcr.microsoft.com/vscode/devcontainers/base AS base # Add some additional packages for the devcontainer terminal environment. USER root @@ -9,19 +9,35 @@ RUN apt-get update && export DEBIAN_FRONTEND=noninteractive \ && apt-get -y install --no-install-recommends \ bash bash-completion \ less colordiff \ - curl jq \ + curl gpg ca-certificates \ + jq \ ripgrep \ vim-nox neovim python3-pynvim \ make \ rename \ + sudo \ && apt-get clean && rm -rf /var/lib/apt/lists/* \ && echo "C-w: unix-filename-rubout" >> /etc/inputrc # Also tweak C-w to stop at slashes as well instead of just spaces +# Prepare the mlos_deps.yml file in a cross platform way. +FROM mcr.microsoft.com/vscode/devcontainers/base AS deps-prep +RUN apt-get update && export DEBIAN_FRONTEND=noninteractive \ + && apt-get -y install --no-install-recommends \ + python3-minimal python3-setuptools +COPY --chown=vscode . /tmp/conda-tmp/ +RUN /tmp/conda-tmp/prep-deps-files.sh \ + && ls -l /tmp/conda-tmp/ # && cat /tmp/conda-tmp/combined.requirements.txt /tmp/conda-tmp/mlos_deps.yml + +FROM base AS conda + # Set some cache dirs to be owned by the vscode user even as we're currently # executing as root to build the container image. # NOTE: We do *not* mark these as volumes - it doesn't help rebuilding at all. +RUN addgroup conda \ + && adduser vscode conda + ARG PIP_CACHE_DIR=/var/cache/pip ENV PIP_CACHE_DIR=/var/cache/pip RUN mkdir -p ${PIP_CACHE_DIR} \ @@ -36,66 +52,57 @@ RUN mkdir -p ${CONDA_PKGS_DIRS} \ USER vscode:conda -# Upgrade conda and use strict priorities -# Use the mamba solver (necessary for some quality of life speedups due to -# required packages to support Windows) -RUN umask 0002 \ +# Try and prime the devcontainer's ssh known_hosts keys with the github one for scripted calls. +RUN mkdir -p /home/vscode/.ssh \ + && ( \ + grep -q ^github.com /home/vscode/.ssh/known_hosts \ + || ssh-keyscan github.com | tee -a /home/vscode/.ssh/known_hosts \ + ) + +COPY --from=deps-prep --chown=vscode:conda /tmp/conda-tmp/mlos_deps.yml /tmp/conda-tmp/combined.requirements.txt /tmp/conda-tmp/ + +# Combine the installation of miniconda and the mlos dependencies into a single step in order to save space. +# This allows the mlos env to reference the base env's packages without duplication across layers. +RUN echo "Setup miniconda" \ + && curl -Ss https://repo.anaconda.com/pkgs/misc/gpgkeys/anaconda.asc | gpg --dearmor | sudo tee /etc/apt/trusted.gpg.d/conda.gpg > /dev/null \ + && gpg --keyring /etc/apt/trusted.gpg.d/conda.gpg --no-default-keyring --fingerprint 34161F5BF5EB1D4BFBBB8F0A8AEB4F8B29D82806 \ + && echo "deb [arch=amd64 signed-by=/etc/apt/trusted.gpg.d/conda.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" | sudo tee /etc/apt/sources.list.d/conda.list \ + && sudo apt-get update \ + && sudo DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends conda \ + && sudo apt-get clean && sudo rm -rf /var/lib/apt/lists/* \ + && echo "# Adjust the conda installation to be user/group writable." \ + && sudo /opt/conda/bin/conda init --system \ + && sudo chgrp -R conda /opt/conda \ + && sudo chmod -R g+wX /opt/conda \ + && find /opt/conda -type d -print0 | xargs -0 sudo chmod -c g+s \ + && umask 0002 \ + && echo "# Use conda-forge first to get the latest versions of packages " \ + && echo "# and reduce duplication with mlos env (which also uses conda-forge first)." \ + && echo "# Upgrade conda and use strict priorities" \ + && echo "# Use the mamba solver (necessary for some quality of life speedups due to required packages to support Windows)" \ + && /opt/conda/bin/conda init \ && /opt/conda/bin/conda config --set channel_priority strict \ && /opt/conda/bin/conda info \ - && /opt/conda/bin/conda update -v -y -n base -c defaults --all \ + && /opt/conda/bin/conda update -v -y -n base -c conda-forge -c defaults --all \ && /opt/conda/bin/conda list -n base \ - && /opt/conda/bin/conda install -v -y -n base conda-libmamba-solver \ - && /opt/conda/bin/conda config --set solver libmamba \ + && /opt/conda/bin/conda install -v -y -n base -c conda-forge -c defaults conda-libmamba-solver \ + && /opt/conda/bin/conda config --system --set solver libmamba \ + && echo "# Install some additional editor packages for the base environment." \ + && /opt/conda/bin/conda run -n base pip install --no-cache-dir -U pynvim \ + && echo "# Clean up conda cache to save some space." \ && /opt/conda/bin/conda list -n base \ && /opt/conda/bin/conda clean -v -y -a \ - && /opt/conda/bin/conda run -n base pip cache purge - -# No longer relevant since we're using conda-forge in the environment files by default now. -## Update the base. This helps save space by making sure the same version -## python is used for both the base env and mlos env. -#RUN umask 0002 \ -# && /opt/conda/bin/conda update -v -y -n base -c defaults --all \ -# && /opt/conda/bin/conda update -v -y -n base -c defaults conda python \ -# && /opt/conda/bin/conda clean -v -y -a \ -# && /opt/conda/bin/conda run -n base pip cache purge - -# Install some additional editor packages for the base environment. -RUN umask 0002 \ - && /opt/conda/bin/conda run -n base pip install --no-cache-dir -U pynvim - -# Setup (part of) the mlos environment in the devcontainer. -# NOTEs: -# - The mlos_deps.yml file is prepared by the prep-container-build script(s). -# - The rest happens during first container start once the source is available. -# See Also: updateContentCommand in .devcontainer/devcontainer.json -RUN mkdir -p /opt/conda/pkgs/cache/ && chown -R vscode:conda /opt/conda/pkgs/cache/ -RUN /opt/conda/bin/conda init bash \ - && /opt/conda/bin/conda config --set solver libmamba - -# Prepare the mlos_deps.yml file in a cross platform way. -FROM mcr.microsoft.com/devcontainers/miniconda:3 AS deps-prep -COPY --chown=vscode:conda . /tmp/conda-tmp/ -RUN /tmp/conda-tmp/prep-deps-files.sh \ - && ls -l /tmp/conda-tmp/ # && cat /tmp/conda-tmp/combined.requirements.txt /tmp/conda-tmp/mlos_deps.yml - -# Install some additional dependencies for the mlos environment. -# Make sure they have conda group ownership to make the devcontainer more -# reliable useable across vscode uid changes. -FROM base AS devcontainer -USER vscode -COPY --from=deps-prep --chown=vscode:conda /tmp/conda-tmp/mlos_deps.yml /tmp/conda-tmp/combined.requirements.txt /tmp/conda-tmp/ -RUN umask 0002 \ + && /opt/conda/bin/conda run -n base pip cache purge \ + && echo "# Install some additional dependencies for the mlos environment." \ + && echo "# Make sure they have conda group ownership to make the devcontainer more" \ + && echo "# reliable useable across vscode uid changes." \ && sg conda -c "/opt/conda/bin/conda env create -n mlos -v -f /tmp/conda-tmp/mlos_deps.yml" \ && sg conda -c "/opt/conda/bin/conda run -n mlos pip install --no-cache-dir -U -r /tmp/conda-tmp/combined.requirements.txt" \ && sg conda -c "/opt/conda/bin/conda run -n mlos pip cache purge" \ && sg conda -c "/opt/conda/bin/conda clean -v -y -a" \ - && mkdir -p /opt/conda/pkgs/cache/ && chown -R vscode:conda /opt/conda/pkgs/cache/ -RUN mkdir -p /home/vscode/.conda/envs \ + && mkdir -p /opt/conda/pkgs/cache/ && chown -R vscode:conda /opt/conda/pkgs/cache/ \ + && mkdir -p /home/vscode/.conda/envs \ && ln -s /opt/conda/envs/mlos /home/vscode/.conda/envs/mlos -# Try and prime the devcontainer's ssh known_hosts keys with the github one for scripted calls. -RUN mkdir -p /home/vscode/.ssh \ - && ( \ - grep -q ^github.com /home/vscode/.ssh/known_hosts \ - || ssh-keyscan github.com | tee -a /home/vscode/.ssh/known_hosts \ - ) +#ENV PATH=/opt/conda/bin:$PATH +ENV PATH=/opt/conda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin