Skip to content

Commit 083dc6b

Browse files
committed
refactor: merge main
2 parents aa4d390 + 759f996 commit 083dc6b

25 files changed

+5228
-168
lines changed

CHANGELOG.md

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,26 @@
11
# Changelog
22

3+
## [0.11.0](https://github.com/runpod/tetra-rp/compare/v0.10.0...v0.11.0) (2025-08-19)
4+
5+
6+
### Features
7+
8+
* Add download acceleration for dependencies and HuggingFace models ([#83](https://github.com/runpod/tetra-rp/issues/83)) ([e47c9e3](https://github.com/runpod/tetra-rp/commit/e47c9e37030ead1831893dd70a1322421befbaad))
9+
10+
## [0.10.0](https://github.com/runpod/tetra-rp/compare/v0.9.0...v0.10.0) (2025-08-07)
11+
12+
13+
### Features
14+
15+
* Add idempotent network volume deployment ([#79](https://github.com/runpod/tetra-rp/issues/79)) ([289d333](https://github.com/runpod/tetra-rp/commit/289d333aaaf48e00bfdad2a5f6356bdfc6bcf286))
16+
17+
## [0.9.0](https://github.com/runpod/tetra-rp/compare/v0.8.0...v0.9.0) (2025-08-04)
18+
19+
20+
### Features
21+
22+
* AE-961 Add class serialization caching for remote execution ([#76](https://github.com/runpod/tetra-rp/issues/76)) ([95f9eed](https://github.com/runpod/tetra-rp/commit/95f9eed1810e6a623091348c326e2ea571c6dddf))
23+
324
## [0.8.0](https://github.com/runpod/tetra-rp/compare/v0.7.0...v0.8.0) (2025-07-22)
425

526

Makefile

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,10 @@ help: # Show this help menu
1515
dev: # Install development dependencies
1616
uv sync --all-groups
1717

18+
update:
19+
uv sync --upgrade --all-groups
20+
uv lock --upgrade
21+
1822
proto: # TODO: auto-generate proto files
1923
@echo "TODO"
2024

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -785,6 +785,6 @@ def fetch_data(url):
785785
This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
786786

787787
<p align="center">
788-
<a href="https://github.com/yourusername/tetra">Tetra</a> •
788+
<a href="https://github.com/runpod/tetra-rp">Tetra</a> •
789789
<a href="https://runpod.io">Runpod</a>
790790
</p>

pyproject.toml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[project]
22
name = "tetra_rp"
3-
version = "0.8.0"
3+
version = "0.11.0"
44
description = "A Python library for distributed inference and serving of machine learning models"
55
authors = [
66
{ name = "Marut Pandya", email = "[email protected]" },
@@ -23,6 +23,7 @@ dependencies = [
2323
"python-dotenv>=1.0.0",
2424
"aiohttp>=3.9.0",
2525
"pydantic>=2.11.4",
26+
2627
]
2728

2829
[dependency-groups]

src/tetra_rp/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
CpuServerlessEndpoint,
1515
CpuInstanceType,
1616
CudaVersion,
17+
DataCenter,
1718
GpuGroup,
1819
LiveServerless,
1920
PodTemplate,
@@ -30,6 +31,7 @@
3031
"CpuServerlessEndpoint",
3132
"CpuInstanceType",
3233
"CudaVersion",
34+
"DataCenter",
3335
"GpuGroup",
3436
"LiveServerless",
3537
"PodTemplate",

src/tetra_rp/client.py

Lines changed: 20 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,8 @@ def remote(
1616
dependencies: Optional[List[str]] = None,
1717
system_dependencies: Optional[List[str]] = None,
1818
type: Optional[str] = None,
19+
accelerate_downloads: bool = True,
20+
hf_models_to_cache: Optional[List[str]] = None,
1921
**extra,
2022
):
2123
"""
@@ -24,12 +26,19 @@ def remote(
2426
This decorator allows a function to be executed in a remote serverless environment, with support for
2527
dynamic resource provisioning and installation of required dependencies.
2628
29+
Args:
2730
resource_config (ServerlessResource): Configuration object specifying the serverless resource
2831
to be provisioned or used.
2932
dependencies (List[str], optional): A list of pip package names to be installed in the remote
3033
environment before executing the function. Defaults to None.
3134
system_dependencies (List[str], optional): A list of system packages to install. Defaults to None.
3235
type (str, optional): Execution type. Use "LB" for DeploymentRuntime (Load Balancer mode).
36+
system_dependencies (List[str], optional): A list of system packages to be installed in the remote
37+
environment before executing the function. Defaults to None.
38+
accelerate_downloads (bool, optional): Enable download acceleration for dependencies and models.
39+
Defaults to True.
40+
hf_models_to_cache (List[str], optional): List of HuggingFace model IDs to pre-cache using
41+
download acceleration. Defaults to None.
3342
extra (dict, optional): Additional parameters for the execution of the resource. Defaults to an empty dict.
3443
3544
Returns:
@@ -42,7 +51,8 @@ def remote(
4251
@remote(
4352
resource_config=my_resource_config,
4453
dependencies=["numpy", "pandas"],
45-
sync=True # Optional, to run synchronously
54+
accelerate_downloads=True,
55+
hf_models_to_cache=["gpt2", "bert-base-uncased"]
4656
)
4757
async def my_function(data):
4858
# Function logic here
@@ -83,6 +93,8 @@ def decorator(func_or_class):
8393
resource_config,
8494
dependencies,
8595
system_dependencies,
96+
accelerate_downloads,
97+
hf_models_to_cache,
8698
extra,
8799
)
88100
else:
@@ -101,7 +113,13 @@ async def wrapper(*args, **kwargs):
101113

102114
stub = stub_resource(remote_resource, **extra)
103115
return await stub(
104-
func_or_class, dependencies, system_dependencies, *args, **kwargs
116+
func_or_class,
117+
dependencies,
118+
system_dependencies,
119+
accelerate_downloads,
120+
hf_models_to_cache,
121+
*args,
122+
**kwargs,
105123
)
106124

107125
return wrapper

src/tetra_rp/core/api/runpod.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -281,6 +281,30 @@ async def create_network_volume(self, payload: Dict[str, Any]) -> Dict[str, Any]
281281

282282
return result
283283

284+
async def list_network_volumes(self) -> Dict[str, Any]:
285+
"""
286+
List all network volumes in Runpod.
287+
288+
Returns:
289+
List of network volume objects or dict containing networkVolumes key.
290+
The API may return either format depending on version.
291+
"""
292+
log.debug("Listing network volumes")
293+
294+
result = await self._execute_rest(
295+
"GET", f"{RUNPOD_REST_API_URL}/networkvolumes"
296+
)
297+
298+
# Handle both list and dict responses
299+
if isinstance(result, list):
300+
volume_count = len(result)
301+
else:
302+
volume_count = len(result.get("networkVolumes", []))
303+
304+
log.debug(f"Listed {volume_count} network volumes")
305+
306+
return result
307+
284308
async def close(self):
285309
"""Close the HTTP session."""
286310
if self.session and not self.session.closed:

src/tetra_rp/core/resources/__init__.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
CudaVersion,
1313
)
1414
from .template import PodTemplate
15-
from .network_volume import NetworkVolume
15+
from .network_volume import NetworkVolume, DataCenter
1616

1717

1818
__all__ = [
@@ -21,6 +21,7 @@
2121
"CpuInstanceType",
2222
"CpuServerlessEndpoint",
2323
"CudaVersion",
24+
"DataCenter",
2425
"DeployableResource",
2526
"GpuGroup",
2627
"GpuType",

src/tetra_rp/core/resources/network_volume.py

Lines changed: 65 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import hashlib
12
import logging
23
from enum import Enum
34
from typing import Optional
@@ -25,23 +26,33 @@ class DataCenter(str, Enum):
2526

2627
class NetworkVolume(DeployableResource):
2728
"""
28-
NetworkVolume resource for creating and managing Runpod netowrk volumes.
29+
NetworkVolume resource for creating and managing Runpod network volumes.
2930
3031
This class handles the creation, deployment, and management of network volumes
31-
that can be attached to serverless resources.
32+
that can be attached to serverless resources. Supports idempotent deployment
33+
where multiple volumes with the same name will reuse existing volumes.
3234
3335
"""
3436

3537
# Internal fixed value
3638
dataCenterId: DataCenter = Field(default=DataCenter.EU_RO_1, frozen=True)
3739

3840
id: Optional[str] = Field(default=None)
39-
name: Optional[str] = None
40-
size: Optional[int] = Field(default=10, gt=0) # Size in GB
41+
name: str
42+
size: Optional[int] = Field(default=100, gt=0) # Size in GB
4143

4244
def __str__(self) -> str:
4345
return f"{self.__class__.__name__}:{self.id}"
4446

47+
@property
48+
def resource_id(self) -> str:
49+
"""Unique resource ID based on name and datacenter for idempotent behavior."""
50+
# Use name + datacenter to ensure idempotence
51+
resource_type = self.__class__.__name__
52+
config_key = f"{self.name}:{self.dataCenterId.value}"
53+
hash_obj = hashlib.md5(f"{resource_type}:{config_key}".encode())
54+
return f"{resource_type}_{hash_obj.hexdigest()}"
55+
4556
@field_serializer("dataCenterId")
4657
def serialize_data_center_id(self, value: Optional[DataCenter]) -> Optional[str]:
4758
"""Convert DataCenter enum to string."""
@@ -61,24 +72,57 @@ def url(self) -> str:
6172
raise ValueError("Network volume ID is not set")
6273
return f"{CONSOLE_BASE_URL}/user/storage"
6374

64-
async def create_network_volume(self) -> str:
75+
def is_deployed(self) -> bool:
6576
"""
66-
Creates a network volume using the provided configuration.
67-
Returns the volume ID.
77+
Checks if the network volume resource is deployed and available.
6878
"""
69-
async with RunpodRestClient() as client:
70-
# Create the network volume
71-
payload = self.model_dump(exclude_none=True)
72-
result = await client.create_network_volume(payload)
79+
return self.id is not None
80+
81+
def _normalize_volumes_response(self, volumes_response) -> list:
82+
"""Normalize API response to list format."""
83+
if isinstance(volumes_response, list):
84+
return volumes_response
85+
return volumes_response.get("networkVolumes", [])
86+
87+
def _find_matching_volume(self, existing_volumes: list) -> Optional[dict]:
88+
"""Find existing volume matching name and datacenter."""
89+
for volume_data in existing_volumes:
90+
if (
91+
volume_data.get("name") == self.name
92+
and volume_data.get("dataCenterId") == self.dataCenterId.value
93+
):
94+
return volume_data
95+
return None
96+
97+
async def _find_existing_volume(self, client) -> Optional["NetworkVolume"]:
98+
"""Check for existing volume with same name and datacenter."""
99+
if not self.name:
100+
return None
101+
102+
log.debug(f"Checking for existing network volume with name: {self.name}")
103+
volumes_response = await client.list_network_volumes()
104+
existing_volumes = self._normalize_volumes_response(volumes_response)
105+
106+
if matching_volume := self._find_matching_volume(existing_volumes):
107+
log.info(
108+
f"Found existing network volume: {matching_volume.get('id')} with name '{self.name}'"
109+
)
110+
# Update our instance with the existing volume's ID
111+
self.id = matching_volume.get("id")
112+
return self
113+
114+
return None
115+
116+
async def _create_new_volume(self, client) -> "NetworkVolume":
117+
"""Create a new network volume."""
118+
log.debug(f"Creating new network volume: {self.name or 'unnamed'}")
119+
payload = self.model_dump(exclude_none=True)
120+
result = await client.create_network_volume(payload)
73121

74122
if volume := self.__class__(**result):
75123
return volume
76124

77-
def is_deployed(self) -> bool:
78-
"""
79-
Checks if the network volume resource is deployed and available.
80-
"""
81-
return self.id is not None
125+
raise ValueError("Deployment failed, no volume was created.")
82126

83127
async def deploy(self) -> "DeployableResource":
84128
"""
@@ -91,16 +135,13 @@ async def deploy(self) -> "DeployableResource":
91135
log.debug(f"{self} exists")
92136
return self
93137

94-
# Create the network volume
95138
async with RunpodRestClient() as client:
96-
# Create the network volume
97-
payload = self.model_dump(exclude_none=True)
98-
result = await client.create_network_volume(payload)
99-
100-
if volume := self.__class__(**result):
101-
return volume
139+
# Check for existing volume first
140+
if existing_volume := await self._find_existing_volume(client):
141+
return existing_volume
102142

103-
raise ValueError("Deployment failed, no volume was created.")
143+
# No existing volume found, create a new one
144+
return await self._create_new_volume(client)
104145

105146
except Exception as e:
106147
log.error(f"{self} failed to deploy: {e}")

0 commit comments

Comments
 (0)