Skip to content

Commit e44b62b

Browse files
[Nexthop] Add artifact handling for FBOSS Image Builder
Add artifact storage and caching capabilities for build outputs. - Implement artifact store for managing build artifacts - Add artifact caching and retrieval functionality - Include unit tests for artifact operations - Update test helpers to support artifact testing Added unit tests for artifact storage and caching.
1 parent c6f20ae commit e44b62b

File tree

3 files changed

+662
-4
lines changed

3 files changed

+662
-4
lines changed
Lines changed: 290 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,290 @@
1+
# Copyright (c) 2004-present, Facebook, Inc.
2+
# All rights reserved.
3+
#
4+
# This source code is licensed under the BSD-style license found in the
5+
# LICENSE file in the root directory of this source tree. An additional grant
6+
# of patent rights can be found in the PATENTS file in the same directory.
7+
8+
"""Artifact storage with file caching for FBOSS image builder."""
9+
10+
import hashlib
11+
import logging
12+
import shutil
13+
import tempfile
14+
from collections.abc import Callable
15+
from pathlib import Path
16+
17+
from distro_cli.lib.paths import get_abs_path
18+
19+
from .exceptions import ArtifactError
20+
21+
logger = logging.getLogger(__name__)
22+
23+
24+
def get_artifact_store_dir() -> Path:
25+
"""Get the default artifact store directory.
26+
27+
Returns:
28+
Path to the default artifact store directory.
29+
Falls back to a temp directory if git repository is not available.
30+
"""
31+
try:
32+
return get_abs_path("fboss-image/distro_cli/.artifacts")
33+
except RuntimeError:
34+
# Not in a git repository (e.g., CMake build copies code outside git)
35+
# Use a temp directory instead
36+
temp_base = Path(tempfile.gettempdir()) / "fboss-distro-cli-artifacts"
37+
temp_base.mkdir(parents=True, exist_ok=True)
38+
logger.warning(
39+
f"Git repository not found, using temp directory for artifacts: {temp_base}"
40+
)
41+
return temp_base
42+
43+
44+
class ArtifactStore:
45+
"""Artifact storage with external cache evaluation.
46+
47+
get() delegates cache evaluation and related fetching to a caller-provided function.
48+
store() persists data and metadata files separately in storage subdirectories.
49+
"""
50+
51+
# Artifact store directory - class attribute (lazy initialization)
52+
# Can be overridden by tests before creating ArtifactStore instances
53+
ARTIFACT_STORE_DIR: Path | None = None
54+
55+
def __init__(self):
56+
"""Initialize artifact store."""
57+
# Use class attribute if set, otherwise compute default
58+
if self.ARTIFACT_STORE_DIR is None:
59+
self.store_dir = get_artifact_store_dir()
60+
else:
61+
self.store_dir = self.ARTIFACT_STORE_DIR
62+
self.store_dir.mkdir(parents=True, exist_ok=True)
63+
logger.debug(f"Artifact store initialized at: {self.store_dir}")
64+
65+
def get(
66+
self,
67+
store_key: str,
68+
fetch_fn: Callable[
69+
[list[Path], list[Path]], tuple[bool, list[Path], list[Path]]
70+
],
71+
) -> tuple[list[Path], list[Path]]:
72+
"""Retrieve artifact files using caller-provided fetch function.
73+
74+
Args:
75+
store_key: Unique identifier for the artifact
76+
fetch_fn: Evaluates stored files and returns (store_hit, data_files, metadata_files)
77+
78+
Returns:
79+
Tuple of (data_files, metadata_files)
80+
"""
81+
store_subdir = self._get_store_subdir(store_key)
82+
stored_data_files = self._get_stored_files_in_dir(store_subdir / "data")
83+
stored_metadata_files = self._get_stored_files_in_dir(store_subdir / "metadata")
84+
85+
logger.info(f"Executing fetch function for: {store_key}")
86+
store_hit, new_data_files, new_metadata_files = fetch_fn(
87+
stored_data_files, stored_metadata_files
88+
)
89+
90+
if store_hit:
91+
logger.info(f"Store hit: {store_key}")
92+
return (stored_data_files, stored_metadata_files)
93+
94+
logger.info(f"Store miss: {store_key}, storing new files")
95+
return self.store(store_key, new_data_files, new_metadata_files)
96+
97+
def _get_store_subdir(self, store_key: str) -> Path:
98+
"""Get the storage subdirectory for a given store key.
99+
100+
Args:
101+
store_key: Store key for the artifact
102+
103+
Returns:
104+
Path to the storage subdirectory
105+
"""
106+
# Use full SHA256 hash to create a directory name
107+
key_hash = hashlib.sha256(store_key.encode()).hexdigest()
108+
return self.store_dir / key_hash
109+
110+
def _get_stored_files_in_dir(self, dir_path: Path) -> list[Path]:
111+
"""Get files from a directory.
112+
113+
Args:
114+
dir_path: Directory path
115+
116+
Returns:
117+
List of file paths
118+
"""
119+
if not dir_path.exists():
120+
return []
121+
return [f for f in dir_path.iterdir() if f.is_file()]
122+
123+
def store(
124+
self, store_key: str, data_files: list[Path], metadata_files: list[Path]
125+
) -> tuple[list[Path], list[Path]]:
126+
"""Store data and metadata files in the storage.
127+
128+
Files/directories are moved to store_subdir/data/ and store_subdir/metadata/.
129+
If a path is a file, it's moved directly.
130+
If a path is a directory, all its contents are moved.
131+
132+
Args:
133+
store_key: Store key for the artifact
134+
data_files: List of data file/directory paths to store
135+
metadata_files: List of metadata file/directory paths to store
136+
137+
Returns:
138+
Tuple of (stored_data_files, stored_metadata_files)
139+
"""
140+
store_subdir = self._get_store_subdir(store_key)
141+
data_dir = store_subdir / "data"
142+
metadata_dir = store_subdir / "metadata"
143+
144+
# Store data files
145+
if data_files:
146+
# Replace any previously stored data for this key so we don't mix
147+
# old and new artifacts (e.g., uncompressed + compressed variants).
148+
if data_dir.exists():
149+
shutil.rmtree(data_dir)
150+
data_dir.mkdir(parents=True, exist_ok=True)
151+
for file_path in data_files:
152+
self._move_to_dir(file_path, data_dir)
153+
logger.info(f"Stored {len(data_files)} data file(s): {store_key}")
154+
155+
# Store metadata files
156+
if metadata_files:
157+
# Likewise, keep metadata for this key in a clean directory so callers
158+
# always see the current set from the latest operation.
159+
if metadata_dir.exists():
160+
shutil.rmtree(metadata_dir)
161+
metadata_dir.mkdir(parents=True, exist_ok=True)
162+
for file_path in metadata_files:
163+
self._move_to_dir(file_path, metadata_dir)
164+
logger.info(f"Stored {len(metadata_files)} metadata file(s): {store_key}")
165+
166+
# Return all stored files (after updating the directories)
167+
return (
168+
self._get_stored_files_in_dir(data_dir),
169+
self._get_stored_files_in_dir(metadata_dir),
170+
)
171+
172+
def _move_to_dir(self, source: Path, dest_dir: Path) -> None:
173+
"""Move a file or directory contents to destination directory.
174+
175+
Uses move instead of copy for better performance when source and dest
176+
are on the same filesystem.
177+
178+
Args:
179+
source: Source file or directory
180+
dest_dir: Destination directory
181+
"""
182+
dest_path = dest_dir / source.name
183+
if source.is_dir():
184+
# For directories, move the entire tree
185+
if dest_path.exists():
186+
shutil.rmtree(dest_path)
187+
shutil.move(str(source), str(dest_path))
188+
else:
189+
# For files, move directly
190+
shutil.move(str(source), str(dest_path))
191+
192+
def invalidate(self, store_key: str) -> None:
193+
"""Remove an artifact from the store.
194+
195+
Args:
196+
store_key: Store key for the artifact to remove
197+
"""
198+
store_subdir = self._get_store_subdir(store_key)
199+
if store_subdir.exists():
200+
shutil.rmtree(store_subdir)
201+
logger.info(f"Invalidated store entry: {store_key}")
202+
203+
def clear(self) -> None:
204+
"""Clear all stored artifacts."""
205+
if self.store_dir.exists():
206+
shutil.rmtree(self.store_dir)
207+
self.store_dir.mkdir(parents=True, exist_ok=True)
208+
logger.info("All stored artifacts cleared")
209+
210+
@classmethod
211+
def create_temp_dir(cls, prefix: str = "temp-") -> Path:
212+
"""Create a temporary directory within the artifact store.
213+
214+
Creates temp directory on same filesystem as artifact store to enable
215+
fast atomic moves and avoid filling up /tmp. Useful for parallel builds
216+
that need isolation.
217+
218+
This is a class method so it can be called without an instance.
219+
220+
Args:
221+
prefix: Prefix for the temporary directory name
222+
223+
Returns:
224+
Path to the created temporary directory
225+
"""
226+
# Get artifact store directory
227+
store_dir = (
228+
get_artifact_store_dir()
229+
if cls.ARTIFACT_STORE_DIR is None
230+
else cls.ARTIFACT_STORE_DIR
231+
)
232+
233+
temp_base = store_dir / ".tmp"
234+
temp_base.mkdir(parents=True, exist_ok=True)
235+
return Path(tempfile.mkdtemp(dir=temp_base, prefix=prefix))
236+
237+
@staticmethod
238+
def delete_temp_dir(temp_dir: Path) -> None:
239+
"""Delete a temporary directory created by create_temp_dir().
240+
241+
This is a static method so it can be called without an instance.
242+
243+
Args:
244+
temp_dir: Path to the temporary directory to delete
245+
"""
246+
if temp_dir.exists():
247+
shutil.rmtree(temp_dir, ignore_errors=True)
248+
logger.debug(f"Deleted temporary directory: {temp_dir}")
249+
250+
251+
def find_artifact_in_dir(
252+
output_dir: Path, pattern: str, component_name: str = "Component"
253+
) -> Path:
254+
"""Find a single artifact matching a glob pattern in a directory.
255+
256+
Supports both uncompressed (.tar) and zstd-compressed (.tar.zst) variants.
257+
258+
Args:
259+
output_dir: Directory to search in
260+
pattern: Glob pattern to match (e.g., "kernel-*.rpms.tar")
261+
component_name: Name of component for error messages
262+
263+
Returns:
264+
Path to the found artifact
265+
266+
Raises:
267+
ArtifactError: If no artifacts found
268+
269+
Note:
270+
If multiple artifacts match, returns the most recent one based on modification time.
271+
"""
272+
# Find both uncompressed and compressed versions
273+
artifacts = list(output_dir.glob(pattern)) + list(output_dir.glob(f"{pattern}.zst"))
274+
275+
if not artifacts:
276+
raise ArtifactError(
277+
f"{component_name} build output not found in: {output_dir} "
278+
f"(patterns: {pattern}, {pattern}.zst)"
279+
)
280+
281+
# If multiple artifacts found, use the most recent one based on modification time
282+
if len(artifacts) > 1:
283+
artifacts.sort(key=lambda p: p.stat().st_mtime, reverse=True)
284+
logger.warning(
285+
f"Multiple artifacts found matching '{pattern}' or '{pattern}.zst', "
286+
f"using most recent: {artifacts[0]}"
287+
)
288+
289+
logger.info(f"Found {component_name} artifact: {artifacts[0]}")
290+
return artifacts[0]

0 commit comments

Comments
 (0)