|
| 1 | +# Copyright (c) 2004-present, Facebook, Inc. |
| 2 | +# All rights reserved. |
| 3 | +# |
| 4 | +# This source code is licensed under the BSD-style license found in the |
| 5 | +# LICENSE file in the root directory of this source tree. An additional grant |
| 6 | +# of patent rights can be found in the PATENTS file in the same directory. |
| 7 | + |
| 8 | +"""Artifact storage with file caching for FBOSS image builder.""" |
| 9 | + |
| 10 | +import hashlib |
| 11 | +import logging |
| 12 | +import shutil |
| 13 | +import tempfile |
| 14 | +from collections.abc import Callable |
| 15 | +from pathlib import Path |
| 16 | + |
| 17 | +from distro_cli.lib.paths import get_abs_path |
| 18 | + |
| 19 | +from .exceptions import ArtifactError |
| 20 | + |
| 21 | +logger = logging.getLogger(__name__) |
| 22 | + |
| 23 | + |
| 24 | +def get_artifact_store_dir() -> Path: |
| 25 | + """Get the default artifact store directory. |
| 26 | +
|
| 27 | + Returns: |
| 28 | + Path to the default artifact store directory. |
| 29 | + Falls back to a temp directory if git repository is not available. |
| 30 | + """ |
| 31 | + try: |
| 32 | + return get_abs_path("fboss-image/distro_cli/.artifacts") |
| 33 | + except RuntimeError: |
| 34 | + # Not in a git repository (e.g., CMake build copies code outside git) |
| 35 | + # Use a temp directory instead |
| 36 | + temp_base = Path(tempfile.gettempdir()) / "fboss-distro-cli-artifacts" |
| 37 | + temp_base.mkdir(parents=True, exist_ok=True) |
| 38 | + logger.warning( |
| 39 | + f"Git repository not found, using temp directory for artifacts: {temp_base}" |
| 40 | + ) |
| 41 | + return temp_base |
| 42 | + |
| 43 | + |
| 44 | +class ArtifactStore: |
| 45 | + """Artifact storage with external cache evaluation. |
| 46 | +
|
| 47 | + get() delegates cache evaluation and related fetching to a caller-provided function. |
| 48 | + store() persists data and metadata files separately in storage subdirectories. |
| 49 | + """ |
| 50 | + |
| 51 | + # Artifact store directory - class attribute (lazy initialization) |
| 52 | + # Can be overridden by tests before creating ArtifactStore instances |
| 53 | + ARTIFACT_STORE_DIR: Path | None = None |
| 54 | + |
| 55 | + def __init__(self): |
| 56 | + """Initialize artifact store.""" |
| 57 | + # Use class attribute if set, otherwise compute default |
| 58 | + if self.ARTIFACT_STORE_DIR is None: |
| 59 | + self.store_dir = get_artifact_store_dir() |
| 60 | + else: |
| 61 | + self.store_dir = self.ARTIFACT_STORE_DIR |
| 62 | + self.store_dir.mkdir(parents=True, exist_ok=True) |
| 63 | + logger.debug(f"Artifact store initialized at: {self.store_dir}") |
| 64 | + |
| 65 | + def get( |
| 66 | + self, |
| 67 | + store_key: str, |
| 68 | + fetch_fn: Callable[ |
| 69 | + [list[Path], list[Path]], tuple[bool, list[Path], list[Path]] |
| 70 | + ], |
| 71 | + ) -> tuple[list[Path], list[Path]]: |
| 72 | + """Retrieve artifact files using caller-provided fetch function. |
| 73 | +
|
| 74 | + Args: |
| 75 | + store_key: Unique identifier for the artifact |
| 76 | + fetch_fn: Evaluates stored files and returns (store_hit, data_files, metadata_files) |
| 77 | +
|
| 78 | + Returns: |
| 79 | + Tuple of (data_files, metadata_files) |
| 80 | + """ |
| 81 | + store_subdir = self._get_store_subdir(store_key) |
| 82 | + stored_data_files = self._get_stored_files_in_dir(store_subdir / "data") |
| 83 | + stored_metadata_files = self._get_stored_files_in_dir(store_subdir / "metadata") |
| 84 | + |
| 85 | + logger.info(f"Executing fetch function for: {store_key}") |
| 86 | + store_hit, new_data_files, new_metadata_files = fetch_fn( |
| 87 | + stored_data_files, stored_metadata_files |
| 88 | + ) |
| 89 | + |
| 90 | + if store_hit: |
| 91 | + logger.info(f"Store hit: {store_key}") |
| 92 | + return (stored_data_files, stored_metadata_files) |
| 93 | + |
| 94 | + logger.info(f"Store miss: {store_key}, storing new files") |
| 95 | + return self.store(store_key, new_data_files, new_metadata_files) |
| 96 | + |
| 97 | + def _get_store_subdir(self, store_key: str) -> Path: |
| 98 | + """Get the storage subdirectory for a given store key. |
| 99 | +
|
| 100 | + Args: |
| 101 | + store_key: Store key for the artifact |
| 102 | +
|
| 103 | + Returns: |
| 104 | + Path to the storage subdirectory |
| 105 | + """ |
| 106 | + # Use full SHA256 hash to create a directory name |
| 107 | + key_hash = hashlib.sha256(store_key.encode()).hexdigest() |
| 108 | + return self.store_dir / key_hash |
| 109 | + |
| 110 | + def _get_stored_files_in_dir(self, dir_path: Path) -> list[Path]: |
| 111 | + """Get files from a directory. |
| 112 | +
|
| 113 | + Args: |
| 114 | + dir_path: Directory path |
| 115 | +
|
| 116 | + Returns: |
| 117 | + List of file paths |
| 118 | + """ |
| 119 | + if not dir_path.exists(): |
| 120 | + return [] |
| 121 | + return [f for f in dir_path.iterdir() if f.is_file()] |
| 122 | + |
| 123 | + def store( |
| 124 | + self, store_key: str, data_files: list[Path], metadata_files: list[Path] |
| 125 | + ) -> tuple[list[Path], list[Path]]: |
| 126 | + """Store data and metadata files in the storage. |
| 127 | +
|
| 128 | + Files/directories are moved to store_subdir/data/ and store_subdir/metadata/. |
| 129 | + If a path is a file, it's moved directly. |
| 130 | + If a path is a directory, all its contents are moved. |
| 131 | +
|
| 132 | + Args: |
| 133 | + store_key: Store key for the artifact |
| 134 | + data_files: List of data file/directory paths to store |
| 135 | + metadata_files: List of metadata file/directory paths to store |
| 136 | +
|
| 137 | + Returns: |
| 138 | + Tuple of (stored_data_files, stored_metadata_files) |
| 139 | + """ |
| 140 | + store_subdir = self._get_store_subdir(store_key) |
| 141 | + data_dir = store_subdir / "data" |
| 142 | + metadata_dir = store_subdir / "metadata" |
| 143 | + |
| 144 | + # Store data files |
| 145 | + if data_files: |
| 146 | + # Replace any previously stored data for this key so we don't mix |
| 147 | + # old and new artifacts (e.g., uncompressed + compressed variants). |
| 148 | + if data_dir.exists(): |
| 149 | + shutil.rmtree(data_dir) |
| 150 | + data_dir.mkdir(parents=True, exist_ok=True) |
| 151 | + for file_path in data_files: |
| 152 | + self._move_to_dir(file_path, data_dir) |
| 153 | + logger.info(f"Stored {len(data_files)} data file(s): {store_key}") |
| 154 | + |
| 155 | + # Store metadata files |
| 156 | + if metadata_files: |
| 157 | + # Likewise, keep metadata for this key in a clean directory so callers |
| 158 | + # always see the current set from the latest operation. |
| 159 | + if metadata_dir.exists(): |
| 160 | + shutil.rmtree(metadata_dir) |
| 161 | + metadata_dir.mkdir(parents=True, exist_ok=True) |
| 162 | + for file_path in metadata_files: |
| 163 | + self._move_to_dir(file_path, metadata_dir) |
| 164 | + logger.info(f"Stored {len(metadata_files)} metadata file(s): {store_key}") |
| 165 | + |
| 166 | + # Return all stored files (after updating the directories) |
| 167 | + return ( |
| 168 | + self._get_stored_files_in_dir(data_dir), |
| 169 | + self._get_stored_files_in_dir(metadata_dir), |
| 170 | + ) |
| 171 | + |
| 172 | + def _move_to_dir(self, source: Path, dest_dir: Path) -> None: |
| 173 | + """Move a file or directory contents to destination directory. |
| 174 | +
|
| 175 | + Uses move instead of copy for better performance when source and dest |
| 176 | + are on the same filesystem. |
| 177 | +
|
| 178 | + Args: |
| 179 | + source: Source file or directory |
| 180 | + dest_dir: Destination directory |
| 181 | + """ |
| 182 | + dest_path = dest_dir / source.name |
| 183 | + if source.is_dir(): |
| 184 | + # For directories, move the entire tree |
| 185 | + if dest_path.exists(): |
| 186 | + shutil.rmtree(dest_path) |
| 187 | + shutil.move(str(source), str(dest_path)) |
| 188 | + else: |
| 189 | + # For files, move directly |
| 190 | + shutil.move(str(source), str(dest_path)) |
| 191 | + |
| 192 | + def invalidate(self, store_key: str) -> None: |
| 193 | + """Remove an artifact from the store. |
| 194 | +
|
| 195 | + Args: |
| 196 | + store_key: Store key for the artifact to remove |
| 197 | + """ |
| 198 | + store_subdir = self._get_store_subdir(store_key) |
| 199 | + if store_subdir.exists(): |
| 200 | + shutil.rmtree(store_subdir) |
| 201 | + logger.info(f"Invalidated store entry: {store_key}") |
| 202 | + |
| 203 | + def clear(self) -> None: |
| 204 | + """Clear all stored artifacts.""" |
| 205 | + if self.store_dir.exists(): |
| 206 | + shutil.rmtree(self.store_dir) |
| 207 | + self.store_dir.mkdir(parents=True, exist_ok=True) |
| 208 | + logger.info("All stored artifacts cleared") |
| 209 | + |
| 210 | + @classmethod |
| 211 | + def create_temp_dir(cls, prefix: str = "temp-") -> Path: |
| 212 | + """Create a temporary directory within the artifact store. |
| 213 | +
|
| 214 | + Creates temp directory on same filesystem as artifact store to enable |
| 215 | + fast atomic moves and avoid filling up /tmp. Useful for parallel builds |
| 216 | + that need isolation. |
| 217 | +
|
| 218 | + This is a class method so it can be called without an instance. |
| 219 | +
|
| 220 | + Args: |
| 221 | + prefix: Prefix for the temporary directory name |
| 222 | +
|
| 223 | + Returns: |
| 224 | + Path to the created temporary directory |
| 225 | + """ |
| 226 | + # Get artifact store directory |
| 227 | + store_dir = ( |
| 228 | + get_artifact_store_dir() |
| 229 | + if cls.ARTIFACT_STORE_DIR is None |
| 230 | + else cls.ARTIFACT_STORE_DIR |
| 231 | + ) |
| 232 | + |
| 233 | + temp_base = store_dir / ".tmp" |
| 234 | + temp_base.mkdir(parents=True, exist_ok=True) |
| 235 | + return Path(tempfile.mkdtemp(dir=temp_base, prefix=prefix)) |
| 236 | + |
| 237 | + @staticmethod |
| 238 | + def delete_temp_dir(temp_dir: Path) -> None: |
| 239 | + """Delete a temporary directory created by create_temp_dir(). |
| 240 | +
|
| 241 | + This is a static method so it can be called without an instance. |
| 242 | +
|
| 243 | + Args: |
| 244 | + temp_dir: Path to the temporary directory to delete |
| 245 | + """ |
| 246 | + if temp_dir.exists(): |
| 247 | + shutil.rmtree(temp_dir, ignore_errors=True) |
| 248 | + logger.debug(f"Deleted temporary directory: {temp_dir}") |
| 249 | + |
| 250 | + |
| 251 | +def find_artifact_in_dir( |
| 252 | + output_dir: Path, pattern: str, component_name: str = "Component" |
| 253 | +) -> Path: |
| 254 | + """Find a single artifact matching a glob pattern in a directory. |
| 255 | +
|
| 256 | + Supports both uncompressed (.tar) and zstd-compressed (.tar.zst) variants. |
| 257 | +
|
| 258 | + Args: |
| 259 | + output_dir: Directory to search in |
| 260 | + pattern: Glob pattern to match (e.g., "kernel-*.rpms.tar") |
| 261 | + component_name: Name of component for error messages |
| 262 | +
|
| 263 | + Returns: |
| 264 | + Path to the found artifact |
| 265 | +
|
| 266 | + Raises: |
| 267 | + ArtifactError: If no artifacts found |
| 268 | +
|
| 269 | + Note: |
| 270 | + If multiple artifacts match, returns the most recent one based on modification time. |
| 271 | + """ |
| 272 | + # Find both uncompressed and compressed versions |
| 273 | + artifacts = list(output_dir.glob(pattern)) + list(output_dir.glob(f"{pattern}.zst")) |
| 274 | + |
| 275 | + if not artifacts: |
| 276 | + raise ArtifactError( |
| 277 | + f"{component_name} build output not found in: {output_dir} " |
| 278 | + f"(patterns: {pattern}, {pattern}.zst)" |
| 279 | + ) |
| 280 | + |
| 281 | + # If multiple artifacts found, use the most recent one based on modification time |
| 282 | + if len(artifacts) > 1: |
| 283 | + artifacts.sort(key=lambda p: p.stat().st_mtime, reverse=True) |
| 284 | + logger.warning( |
| 285 | + f"Multiple artifacts found matching '{pattern}' or '{pattern}.zst', " |
| 286 | + f"using most recent: {artifacts[0]}" |
| 287 | + ) |
| 288 | + |
| 289 | + logger.info(f"Found {component_name} artifact: {artifacts[0]}") |
| 290 | + return artifacts[0] |
0 commit comments