-
Notifications
You must be signed in to change notification settings - Fork 6.8k
[release test] move azure related functions to cloud_util.py
#57675
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,102 @@ | ||
import random | ||
import string | ||
from typing import Optional, Tuple | ||
import time | ||
import os | ||
import shutil | ||
from urllib.parse import urlparse | ||
|
||
from azure.storage.blob import BlobServiceClient | ||
from azure.identity import DefaultAzureCredential | ||
|
||
from ray_release.logger import logger | ||
|
||
|
||
def generate_tmp_cloud_storage_path() -> str: | ||
return "".join(random.choice(string.ascii_lowercase) for i in range(10)) | ||
|
||
|
||
def upload_file_to_azure( | ||
local_file_path: str, | ||
azure_file_path: str, | ||
blob_service_client: Optional[BlobServiceClient] = None, | ||
) -> None: | ||
"""Upload a file to Azure Blob Storage. | ||
|
||
Args: | ||
local_file_path: Path to local file to upload. | ||
azure_file_path: Path to file in Azure blob storage. | ||
""" | ||
|
||
account, container, path = _parse_abfss_uri(azure_file_path) | ||
account_url = f"https://{account}.blob.core.windows.net" | ||
if blob_service_client is None: | ||
credential = DefaultAzureCredential(exclude_managed_identity_credential=True) | ||
blob_service_client = BlobServiceClient(account_url, credential) | ||
|
||
blob_client = blob_service_client.get_blob_client(container=container, blob=path) | ||
try: | ||
with open(local_file_path, "rb") as f: | ||
blob_client.upload_blob(data=f, overwrite=True) | ||
except Exception as e: | ||
logger.exception(f"Failed to upload file to Azure Blob Storage: {e}") | ||
raise | ||
|
||
|
||
def archive_directory(directory_path: str) -> str: | ||
timestamp = str(int(time.time())) | ||
archived_filename = f"ray_release_{timestamp}.zip" | ||
output_path = os.path.abspath(archived_filename) | ||
shutil.make_archive(output_path[:-4], "zip", directory_path) | ||
return output_path | ||
|
||
|
||
def upload_working_dir_to_azure(working_dir: str, azure_directory_uri: str) -> str: | ||
"""Upload archived working directory to Azure blob storage. | ||
|
||
Args: | ||
working_dir: Path to directory to upload. | ||
azure_directory_uri: Path to directory in Azure blob storage. | ||
Returns: | ||
Azure blob storage path where archived directory was uploaded. | ||
""" | ||
archived_file_path = archive_directory(working_dir) | ||
archived_filename = os.path.basename(archived_file_path) | ||
azure_file_path = f"{azure_directory_uri}/{archived_filename}" | ||
upload_file_to_azure( | ||
local_file_path=archived_file_path, azure_file_path=azure_file_path | ||
) | ||
return azure_file_path | ||
|
||
|
||
def _parse_abfss_uri(uri: str) -> Tuple[str, str, str]: | ||
"""Parse ABFSS URI to extract account, container, and path. | ||
ABFSS URI format: abfss://[email protected]/path | ||
Returns: (account_name, container_name, path) | ||
""" | ||
parsed = urlparse(uri) | ||
if "@" not in parsed.netloc: | ||
raise ValueError( | ||
f"Invalid ABFSS URI format: {uri}. " | ||
"Expected format: abfss://[email protected]/path" | ||
) | ||
|
||
# Split netloc into [email protected] | ||
container, account_part = parsed.netloc.split("@", 1) | ||
|
||
# Extract account name from account.dfs.core.windows.net | ||
account = account_part.split(".")[0] | ||
|
||
# Path starts with / which we keep for the blob path | ||
path = parsed.path.lstrip("/") | ||
|
||
return account, container, path | ||
|
||
|
||
def convert_abfss_uri_to_https(uri: str) -> str: | ||
"""Convert ABFSS URI to HTTPS URI. | ||
ABFSS URI format: abfss://[email protected]/path | ||
Returns: HTTPS URI format: https://account.dfs.core.windows.net/container/path | ||
""" | ||
account, container, path = _parse_abfss_uri(uri) | ||
return f"https://{account}.dfs.core.windows.net/{container}/{path}" | ||
Comment on lines
+96
to
+102
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The function def convert_abfss_uri_to_https(uri: str) -> str:
"""Convert ABFSS URI to HTTPS URI.
ABFSS URI format: abfss://[email protected]/path
Returns: HTTPS URI format: https://account.blob.core.windows.net/container/path
"""
account, container, path = _parse_abfss_uri(uri)
return f"https://{account}.blob.core.windows.net/{container}/{path}" |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2,20 +2,15 @@ | |
import hashlib | ||
import json | ||
import os | ||
import random | ||
import string | ||
import subprocess | ||
import time | ||
from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple | ||
|
||
from azure.storage.blob import BlobServiceClient | ||
from azure.identity import DefaultAzureCredential | ||
from google.cloud import storage | ||
import requests | ||
import shutil | ||
from urllib.parse import urlparse | ||
|
||
from ray_release.logger import logger | ||
from ray_release.cloud_util import archive_directory | ||
from ray_release.configs.global_config import get_global_config | ||
|
||
if TYPE_CHECKING: | ||
|
@@ -196,10 +191,6 @@ def python_version_str(python_version: Tuple[int, int]) -> str: | |
return "".join([str(x) for x in python_version]) | ||
|
||
|
||
def generate_tmp_cloud_storage_path() -> str: | ||
return "".join(random.choice(string.ascii_lowercase) for i in range(10)) | ||
|
||
|
||
def join_cloud_storage_paths(*paths: str): | ||
paths = list(paths) | ||
if len(paths) > 1: | ||
|
@@ -222,7 +213,7 @@ def upload_working_dir_to_gcs(working_dir: str) -> str: | |
""" | ||
# Create archive of working dir | ||
logger.info(f"Archiving working directory: {working_dir}") | ||
archived_file_path = _archive_directory(working_dir) | ||
archived_file_path = archive_directory(working_dir) | ||
archived_filename = os.path.basename(archived_file_path) | ||
|
||
# Upload to GCS | ||
|
@@ -232,89 +223,3 @@ def upload_working_dir_to_gcs(working_dir: str) -> str: | |
blob.upload_from_filename(archived_filename) | ||
|
||
return f"gs://ray-release-working-dir/{blob.name}" | ||
|
||
|
||
def upload_file_to_azure( | ||
local_file_path: str, | ||
azure_file_path: str, | ||
blob_service_client: Optional[BlobServiceClient] = None, | ||
) -> None: | ||
"""Upload a file to Azure Blob Storage. | ||
|
||
Args: | ||
local_file_path: Path to local file to upload. | ||
azure_file_path: Path to file in Azure blob storage. | ||
""" | ||
|
||
account, container, path = _parse_abfss_uri(azure_file_path) | ||
account_url = f"https://{account}.blob.core.windows.net" | ||
if blob_service_client is None: | ||
credential = DefaultAzureCredential(exclude_managed_identity_credential=True) | ||
blob_service_client = BlobServiceClient(account_url, credential) | ||
|
||
blob_client = blob_service_client.get_blob_client(container=container, blob=path) | ||
try: | ||
with open(local_file_path, "rb") as f: | ||
blob_client.upload_blob(data=f, overwrite=True) | ||
except Exception as e: | ||
logger.exception(f"Failed to upload file to Azure Blob Storage: {e}") | ||
raise | ||
|
||
|
||
def _archive_directory(directory_path: str) -> str: | ||
timestamp = str(int(time.time())) | ||
archived_filename = f"ray_release_{timestamp}.zip" | ||
output_path = os.path.abspath(archived_filename) | ||
shutil.make_archive(output_path[:-4], "zip", directory_path) | ||
return output_path | ||
|
||
|
||
def upload_working_dir_to_azure(working_dir: str, azure_directory_uri: str) -> str: | ||
"""Upload archived working directory to Azure blob storage. | ||
|
||
Args: | ||
working_dir: Path to directory to upload. | ||
azure_directory_uri: Path to directory in Azure blob storage. | ||
Returns: | ||
Azure blob storage path where archived directory was uploaded. | ||
""" | ||
archived_file_path = _archive_directory(working_dir) | ||
archived_filename = os.path.basename(archived_file_path) | ||
azure_file_path = f"{azure_directory_uri}/{archived_filename}" | ||
upload_file_to_azure( | ||
local_file_path=archived_file_path, azure_file_path=azure_file_path | ||
) | ||
return azure_file_path | ||
|
||
|
||
def _parse_abfss_uri(uri: str) -> Tuple[str, str, str]: | ||
"""Parse ABFSS URI to extract account, container, and path. | ||
ABFSS URI format: abfss://[email protected]/path | ||
Returns: (account_name, container_name, path) | ||
""" | ||
parsed = urlparse(uri) | ||
if "@" not in parsed.netloc: | ||
raise ValueError( | ||
f"Invalid ABFSS URI format: {uri}. " | ||
"Expected format: abfss://[email protected]/path" | ||
) | ||
|
||
# Split netloc into [email protected] | ||
container, account_part = parsed.netloc.split("@", 1) | ||
|
||
# Extract account name from account.dfs.core.windows.net | ||
account = account_part.split(".")[0] | ||
|
||
# Path starts with / which we keep for the blob path | ||
path = parsed.path.lstrip("/") | ||
|
||
return account, container, path | ||
|
||
|
||
def convert_abfss_uri_to_https(uri: str) -> str: | ||
"""Convert ABFSS URI to HTTPS URI. | ||
ABFSS URI format: abfss://[email protected]/path | ||
Returns: HTTPS URI format: https://account.dfs.core.windows.net/container/path | ||
""" | ||
account, container, path = _parse_abfss_uri(uri) | ||
return f"https://{account}.dfs.core.windows.net/{container}/{path}" |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The
upload_working_dir_to_azure
function callsarchive_directory
to create a temporary zip file, but this file is not deleted after being uploaded to Azure. This will leave temporary files in the execution environment. It's a good practice to ensure temporary resources are cleaned up. You can use atry...finally
block to ensure the file is removed even if the upload fails.