|
| 1 | +# Copyright (c) 2004-present, Facebook, Inc. |
| 2 | +# All rights reserved. |
| 3 | +# |
| 4 | +# This source code is licensed under the BSD-style license found in the |
| 5 | +# LICENSE file in the root directory of this source tree. An additional grant |
| 6 | +# of patent rights can be found in the PATENTS file in the same directory. |
| 7 | + |
| 8 | +"""Docker image management utilities.""" |
| 9 | + |
| 10 | +import hashlib |
| 11 | +import json |
| 12 | +import logging |
| 13 | +import os |
| 14 | +import subprocess |
| 15 | +import time |
| 16 | +from pathlib import Path |
| 17 | + |
| 18 | +from distro_cli.lib.constants import FBOSS_BUILDER_IMAGE |
| 19 | + |
| 20 | +logger = logging.getLogger(__name__) |
| 21 | + |
| 22 | +# Default cache expiration time in seconds (24 hours) |
| 23 | +DEFAULT_CACHE_EXPIRATION_SECONDS = 24 * 60 * 60 |
| 24 | + |
| 25 | + |
| 26 | +def get_git_dir() -> Path: |
| 27 | + """Find the repository root directory by looking for .git marker. |
| 28 | +
|
| 29 | + Returns: |
| 30 | + Path to the repository root directory |
| 31 | +
|
| 32 | + Raises: |
| 33 | + RuntimeError: If repository root cannot be found |
| 34 | + """ |
| 35 | + current = Path(__file__).resolve() |
| 36 | + while current != current.parent: |
| 37 | + if (current / ".git").exists(): |
| 38 | + return current |
| 39 | + current = current.parent |
| 40 | + raise RuntimeError("Could not find repository root (no .git directory found)") |
| 41 | + |
| 42 | + |
| 43 | +def _hash_directory_tree( |
| 44 | + directory: Path, exclude_patterns: list[str] | None = None |
| 45 | +) -> str: |
| 46 | + """Hash all files in a directory tree. |
| 47 | +
|
| 48 | + Args: |
| 49 | + directory: Directory to hash |
| 50 | + exclude_patterns: List of patterns to exclude (e.g., '__pycache__', '.pyc') |
| 51 | +
|
| 52 | + Returns: |
| 53 | + SHA256 hexdigest of all files in the directory |
| 54 | + """ |
| 55 | + if exclude_patterns is None: |
| 56 | + exclude_patterns = [] |
| 57 | + |
| 58 | + hasher = hashlib.sha256() |
| 59 | + |
| 60 | + # Get all files, sorted for deterministic ordering |
| 61 | + for file_path in sorted(directory.rglob("*")): |
| 62 | + if not file_path.is_file(): |
| 63 | + continue |
| 64 | + |
| 65 | + # Skip excluded patterns |
| 66 | + skip = False |
| 67 | + for pattern in exclude_patterns: |
| 68 | + if pattern in str(file_path): |
| 69 | + skip = True |
| 70 | + break |
| 71 | + if skip: |
| 72 | + continue |
| 73 | + |
| 74 | + # Hash the relative path (for structure changes) |
| 75 | + rel_path = file_path.relative_to(directory) |
| 76 | + hasher.update(str(rel_path).encode()) |
| 77 | + |
| 78 | + # Hash the file content |
| 79 | + hasher.update(file_path.read_bytes()) |
| 80 | + |
| 81 | + return hasher.hexdigest() |
| 82 | + |
| 83 | + |
| 84 | +def _compute_dependency_checksum(root_dir: Path) -> str: |
| 85 | + """Compute checksum of Dockerfile and all its dependencies. |
| 86 | +
|
| 87 | + This includes: |
| 88 | + - Dockerfile |
| 89 | + - All files in build/ directory (manifests, getdeps.py, Python modules, etc.) |
| 90 | +
|
| 91 | + Args: |
| 92 | + root_dir: Root directory of the repository |
| 93 | +
|
| 94 | + Returns: |
| 95 | + SHA256 hexdigest of all dependency files |
| 96 | + """ |
| 97 | + hasher = hashlib.sha256() |
| 98 | + |
| 99 | + # Hash Dockerfile |
| 100 | + dockerfile = root_dir / "fboss" / "oss" / "docker" / "Dockerfile" |
| 101 | + if not dockerfile.exists(): |
| 102 | + raise RuntimeError(f"Dockerfile not found: {dockerfile}") |
| 103 | + hasher.update(dockerfile.read_bytes()) |
| 104 | + |
| 105 | + # Hash entire build/ directory (excluding ephemeral files) |
| 106 | + build_dir = root_dir / "build" |
| 107 | + if not build_dir.exists(): |
| 108 | + raise RuntimeError(f"build/ directory not found: {build_dir}") |
| 109 | + |
| 110 | + # Exclude Python bytecode and cache files |
| 111 | + exclude_patterns = ["__pycache__", ".pyc", ".pyo"] |
| 112 | + build_hash = _hash_directory_tree(build_dir, exclude_patterns) |
| 113 | + hasher.update(build_hash.encode()) |
| 114 | + |
| 115 | + return hasher.hexdigest() |
| 116 | + |
| 117 | + |
| 118 | +def _get_image_build_timestamp(image_tag: str) -> int | None: |
| 119 | + """Get the build timestamp from a Docker image label. |
| 120 | +
|
| 121 | + Args: |
| 122 | + image_tag: Full image tag (e.g., "fboss_builder:abc123") |
| 123 | +
|
| 124 | + Returns: |
| 125 | + Unix timestamp when image was built, or None if not found |
| 126 | + """ |
| 127 | + try: |
| 128 | + result = subprocess.run( |
| 129 | + [ |
| 130 | + "docker", |
| 131 | + "image", |
| 132 | + "inspect", |
| 133 | + image_tag, |
| 134 | + "--format", |
| 135 | + "{{json .Config.Labels}}", |
| 136 | + ], |
| 137 | + capture_output=True, |
| 138 | + text=True, |
| 139 | + check=False, |
| 140 | + ) |
| 141 | + if result.returncode != 0: |
| 142 | + return None |
| 143 | + |
| 144 | + labels = json.loads(result.stdout.strip()) |
| 145 | + if not labels: |
| 146 | + return None |
| 147 | + |
| 148 | + timestamp_str = labels.get("build_timestamp") |
| 149 | + if not timestamp_str: |
| 150 | + return None |
| 151 | + |
| 152 | + return int(timestamp_str) |
| 153 | + except (json.JSONDecodeError, ValueError, FileNotFoundError): |
| 154 | + return None |
| 155 | + |
| 156 | + |
| 157 | +def _should_build_image(root_dir: Path) -> tuple[bool, str, str]: |
| 158 | + """Determine if the fboss_builder image should be rebuilt. |
| 159 | +
|
| 160 | + Args: |
| 161 | + root_dir: Repository root directory |
| 162 | +
|
| 163 | + Returns: |
| 164 | + Tuple of (should_build, checksum, reason) |
| 165 | + """ |
| 166 | + # Get cache expiration from environment variable (in hours) |
| 167 | + expiration_hours = int(os.getenv("FBOSS_BUILDER_CACHE_EXPIRATION_HOURS", "24")) |
| 168 | + expiration_seconds = expiration_hours * 60 * 60 |
| 169 | + |
| 170 | + # Compute checksum of known dependencies |
| 171 | + logger.debug("Computing checksum of Dockerfile dependencies...") |
| 172 | + checksum = _compute_dependency_checksum(root_dir) |
| 173 | + checksum_tag = f"{FBOSS_BUILDER_IMAGE}:{checksum}" |
| 174 | + |
| 175 | + logger.debug(f"Dockerfile checksum: {checksum[:12]}") |
| 176 | + |
| 177 | + # Get the image timestamp once |
| 178 | + timestamp = _get_image_build_timestamp(checksum_tag) |
| 179 | + |
| 180 | + # If image doesn't exist, rebuild |
| 181 | + if timestamp is None: |
| 182 | + return (True, checksum, "not found") |
| 183 | + |
| 184 | + # Check if image is expired |
| 185 | + current_time = int(time.time()) |
| 186 | + age_seconds = current_time - timestamp |
| 187 | + if age_seconds >= expiration_seconds: |
| 188 | + return (True, checksum, f"expired (>{expiration_hours}h old)") |
| 189 | + |
| 190 | + return (False, checksum, "exists and is not expired") |
| 191 | + |
| 192 | + |
| 193 | +def build_fboss_builder_image() -> None: |
| 194 | + """Build the fboss_builder Docker image if needed. |
| 195 | +
|
| 196 | + Uses a two-tier caching strategy: |
| 197 | + 1. Local cache: Check if image with checksum tag exists locally |
| 198 | + 2. Build: Build the image if not found |
| 199 | +
|
| 200 | + Time-based expiration: Even if checksum matches, rebuilds if image is older |
| 201 | + than the expiration time (default: 24 hours, configurable via |
| 202 | + FBOSS_BUILDER_CACHE_EXPIRATION_HOURS environment variable). |
| 203 | +
|
| 204 | + Raises: |
| 205 | + RuntimeError: If the build script is not found or build fails |
| 206 | + """ |
| 207 | + |
| 208 | + # Find paths |
| 209 | + root_dir = get_git_dir() |
| 210 | + dockerfile = root_dir / "fboss" / "oss" / "docker" / "Dockerfile" |
| 211 | + build_script = root_dir / "fboss" / "oss" / "scripts" / "build_docker.sh" |
| 212 | + |
| 213 | + if not dockerfile.exists(): |
| 214 | + raise RuntimeError(f"Dockerfile not found: {dockerfile}") |
| 215 | + |
| 216 | + if not build_script.exists(): |
| 217 | + raise RuntimeError(f"Build script not found: {build_script}") |
| 218 | + |
| 219 | + # Check if we should rebuild (checks local cache) |
| 220 | + should_build, checksum, reason = _should_build_image(root_dir) |
| 221 | + |
| 222 | + if not should_build: |
| 223 | + logger.info( |
| 224 | + f"{FBOSS_BUILDER_IMAGE} image with checksum {checksum[:12]} " |
| 225 | + f"{reason}, skipping build" |
| 226 | + ) |
| 227 | + return |
| 228 | + |
| 229 | + # Build the image |
| 230 | + logger.info(f"Building {FBOSS_BUILDER_IMAGE} image...") |
| 231 | + checksum_tag = f"{FBOSS_BUILDER_IMAGE}:{checksum}" |
| 232 | + |
| 233 | + try: |
| 234 | + # Build with label containing current timestamp |
| 235 | + current_timestamp = int(time.time()) |
| 236 | + subprocess.run( |
| 237 | + [str(build_script)], |
| 238 | + check=True, |
| 239 | + cwd=str(root_dir), |
| 240 | + env={ |
| 241 | + **os.environ, |
| 242 | + "DOCKER_BUILDKIT": "1", |
| 243 | + "BUILDKIT_PROGRESS": "plain", |
| 244 | + }, |
| 245 | + ) |
| 246 | + logger.info(f"Successfully built {FBOSS_BUILDER_IMAGE} image") |
| 247 | + |
| 248 | + # Tag with checksum and add timestamp label |
| 249 | + subprocess.run( |
| 250 | + ["docker", "tag", f"{FBOSS_BUILDER_IMAGE}:latest", checksum_tag], check=True |
| 251 | + ) |
| 252 | + |
| 253 | + # Add build timestamp label to the checksum-tagged image |
| 254 | + # We do this by creating a new image with the label |
| 255 | + subprocess.run( |
| 256 | + [ |
| 257 | + "docker", |
| 258 | + "build", |
| 259 | + "--label", |
| 260 | + f"build_timestamp={current_timestamp}", |
| 261 | + "--tag", |
| 262 | + checksum_tag, |
| 263 | + "-", |
| 264 | + ], |
| 265 | + input=f"FROM {FBOSS_BUILDER_IMAGE}:latest\n", |
| 266 | + text=True, |
| 267 | + check=True, |
| 268 | + capture_output=True, |
| 269 | + ) |
| 270 | + |
| 271 | + logger.info(f"Tagged image with checksum: {checksum[:12]}") |
| 272 | + |
| 273 | + except subprocess.CalledProcessError as e: |
| 274 | + raise RuntimeError(f"Failed to build {FBOSS_BUILDER_IMAGE} image: {e}") from e |
0 commit comments