From f8b08a446c218f8a3b8fa0f481dc5aab5371bcc5 Mon Sep 17 00:00:00 2001 From: Gabriel Luiz Freitas Almeida Date: Mon, 3 Feb 2025 12:56:47 -0300 Subject: [PATCH] refactor: Enhance profile picture copying with optimized I/O and error handling (#6083) * feat: enhance profile picture copying with optimized I/O and error handling * fix: correct docstring for copy_profile_pictures function --- .../base/langflow/initial_setup/setup.py | 60 +++++++++++++++---- 1 file changed, 49 insertions(+), 11 deletions(-) diff --git a/src/backend/base/langflow/initial_setup/setup.py b/src/backend/base/langflow/initial_setup/setup.py index 481f3169d6f1..4a6d136f0102 100644 --- a/src/backend/base/langflow/initial_setup/setup.py +++ b/src/backend/base/langflow/initial_setup/setup.py @@ -35,12 +35,7 @@ get_default_folder_id, ) from langflow.services.database.models.user.crud import get_user_by_username -from langflow.services.deps import ( - get_settings_service, - get_storage_service, - get_variable_service, - session_scope, -) +from langflow.services.deps import get_settings_service, get_storage_service, get_variable_service, session_scope from langflow.template.field.prompt import DEFAULT_PROMPT_INTUT_TYPES from langflow.utils.util import escape_json_dump @@ -393,10 +388,27 @@ async def load_starter_projects(retries=3, delay=1) -> list[tuple[anyio.Path, di async def copy_profile_pictures() -> None: + """Asynchronously copies profile pictures from the source directory to the target configuration directory. + + This function copies profile pictures while optimizing I/O operations by: + 1. Using a set to track existing files and avoid redundant filesystem checks + 2. Performing bulk copy operations concurrently using asyncio.gather + 3. Offloading blocking I/O to threads + + The directory structure is: + profile_pictures/ + ├── People/ + │ └── [profile images] + └── Space/ + └── [profile images] + """ + # Get config directory from settings config_dir = get_storage_service().settings_service.settings.config_dir if config_dir is None: msg = "Config dir is not set in the settings" raise ValueError(msg) + + # Setup source and target paths origin = anyio.Path(__file__).parent / "profile_pictures" target = anyio.Path(config_dir) / "profile_pictures" @@ -404,15 +416,41 @@ async def copy_profile_pictures() -> None: msg = f"The source folder '{origin}' does not exist." raise ValueError(msg) + # Create target dir if needed if not await target.exists(): - await target.mkdir(parents=True) + await target.mkdir(parents=True, exist_ok=True) try: - await asyncio.to_thread(shutil.copytree, str(origin), str(target), dirs_exist_ok=True) - logger.debug(f"Folder copied from '{origin}' to '{target}'") + # Get set of existing files in target to avoid redundant checks + target_files = {str(f.relative_to(target)) async for f in target.rglob("*") if await f.is_file()} + + # Define a helper coroutine to copy a single file concurrently + async def copy_file(src_file, dst_file, rel_path): + # Create parent directories if needed + await dst_file.parent.mkdir(parents=True, exist_ok=True) + # Offload blocking I/O to a thread + await asyncio.to_thread(shutil.copy2, str(src_file), str(dst_file)) + logger.debug(f"Copied file '{rel_path}'") + + tasks = [] + async for src_file in origin.rglob("*"): + if not await src_file.is_file(): + continue + + rel_path = src_file.relative_to(origin) + if str(rel_path) not in target_files: + dst_file = target / rel_path + tasks.append(copy_file(src_file, dst_file, rel_path)) + else: + logger.debug(f"Skipped existing file: '{rel_path}'") + + if tasks: + await asyncio.gather(*tasks) - except Exception: # noqa: BLE001 - logger.exception("Error copying the folder") + except Exception as exc: + logger.exception("Error copying profile pictures") + msg = "An error occurred while copying profile pictures." + raise RuntimeError(msg) from exc def get_project_data(project):