@@ -88,6 +88,8 @@ def __init__(
8888 raise DeploymentRuntimeConfigurationError ("endpoint_url is required" )
8989
9090 self ._session = None
91+ self ._health_checked = False
92+ self ._health_check_retries = 3
9193
9294 if not self .api_key :
9395 log .warning (
@@ -220,6 +222,9 @@ def decorator(cls):
220222 async def call_remote_method (self , request : FunctionRequest ) -> Any :
221223 """Call remote method via /execute endpoint."""
222224 try :
225+ # Ensure endpoint is healthy before making the request
226+ await self ._ensure_healthy ()
227+
223228 url = f"{ self .endpoint_url } /execute"
224229 payload = {"input" : request .model_dump (exclude_none = True )}
225230
@@ -261,6 +266,9 @@ async def call_remote_method(self, request: FunctionRequest) -> Any:
261266 async def call_http_endpoint (self , method_name : str , data : Dict [str , Any ]) -> Any :
262267 """Call HTTP endpoint directly."""
263268 try :
269+ # Ensure endpoint is healthy before making the request
270+ await self ._ensure_healthy ()
271+
264272 url = f"{ self .endpoint_url } /{ method_name } "
265273
266274 log .debug (f"HTTP call to { url } for method: { method_name } " )
@@ -279,17 +287,41 @@ async def call_http_endpoint(self, method_name: str, data: Dict[str, Any]) -> An
279287 {"method_name" : method_name , "error" : str (e )},
280288 )
281289
290+ async def _ensure_healthy (self ) -> None :
291+ """Ensure the endpoint is healthy before making requests."""
292+ if self ._health_checked :
293+ return
294+
295+ log .debug ("Performing automatic health check..." )
296+
297+ for attempt in range (self ._health_check_retries ):
298+ try :
299+ await self ._perform_health_check ()
300+ self ._health_checked = True
301+ log .debug (f"Health check successful on attempt { attempt + 1 } " )
302+ return
303+ except Exception as e :
304+ if attempt == self ._health_check_retries - 1 :
305+ log .error (f"Health check failed after { self ._health_check_retries } attempts: { e } " )
306+ raise DeploymentRuntimeConnectionError (
307+ self .endpoint_url ,
308+ f"Endpoint health check failed after { self ._health_check_retries } attempts: { e } " ,
309+ {'attempts' : self ._health_check_retries , 'last_error' : str (e )}
310+ )
311+ else :
312+ log .warning (f"Health check attempt { attempt + 1 } failed, retrying..." )
313+ await asyncio .sleep (1.0 * (attempt + 1 )) # Progressive backoff
314+
315+ async def _perform_health_check (self ) -> Dict [str , Any ]:
316+ """Perform a single health check."""
317+ url = f"{ self .endpoint_url } /health"
318+ log .debug (f"Health check: { url } " )
319+ return await self ._make_request_with_retry ("GET" , url , "health_check" )
320+
282321 async def health_check (self ) -> Dict [str , Any ]:
283- """Check DeploymentRuntime health."""
322+ """Check DeploymentRuntime health (public method) ."""
284323 try :
285- url = f"{ self .endpoint_url } /health"
286-
287- log .debug (f"Health check: { url } " )
288-
289- result = await self ._make_request_with_retry ("GET" , url , "health_check" )
290-
291- return result
292-
324+ return await self ._perform_health_check ()
293325 except DeploymentRuntimeConnectionError :
294326 raise
295327 except Exception as e :
0 commit comments