rhesis-ai
diff --git a/‎sdk/pyproject.toml
Lines changed: 2 additions & 0 deletions b/‎sdk/pyproject.toml
Lines changed: 2 additions & 0 deletions
diff --git a/‎sdk/src/rhesis/sdk/errors.py
Lines changed: 13 additions & 0 deletions b/‎sdk/src/rhesis/sdk/errors.py
Lines changed: 13 additions & 0 deletions
diff --git a/‎sdk/src/rhesis/sdk/models/__init__.py
Lines changed: 3 additions & 1 deletion b/‎sdk/src/rhesis/sdk/models/__init__.py
Lines changed: 3 additions & 1 deletion
diff --git a/‎sdk/src/rhesis/sdk/models/providers/gemini.py
Lines changed: 5 additions & 34 deletions b/‎sdk/src/rhesis/sdk/models/providers/gemini.py
Lines changed: 5 additions & 34 deletions
diff --git a/‎sdk/src/rhesis/sdk/models/providers/huggingface.py
Lines changed: 175 additions & 0 deletions b/‎sdk/src/rhesis/sdk/models/providers/huggingface.py
Lines changed: 175 additions & 0 deletions
diff --git a/‎sdk/src/rhesis/sdk/models/providers/litellm.py
Lines changed: 78 additions & 0 deletions b/‎sdk/src/rhesis/sdk/models/providers/litellm.py
Lines changed: 78 additions & 0 deletions
diff --git a/‎sdk/src/rhesis/sdk/models/providers/openai.py
Lines changed: 12 additions & 13 deletions b/‎sdk/src/rhesis/sdk/models/providers/openai.py
Lines changed: 12 additions & 13 deletions
@@ -39,6 +39,8 @@ dependencies = [
     "tomli>=2.2.1",
     "tomli-w>=1.2.0",
     "litellm>=1.76.0",
+    "torch>=2.8.0",
+    "transformers>=4.56.0",
 ]
 
 [project.license]
 
@@ -0,0 +1,13 @@
+# Error messages used in the SDK. This file makes it easier to find and edit the used Error messages
+
+# For this file too long lines are fine for better readability
+# flake8: noqa: E501
+
+
+# LLM Errors
+NO_MODEL_NAME_PROVIDED = "The model name is not valid. Please provide a non-empty string."
+HUGGINGFACE_MODEL_NOT_LOADED = "Hugging Face model is not loaded. Set auto_loading=True to load it manually using `load_model()`."
+MODEL_RELOAD_WARNING = "WARNING: The model {} is already loaded. It will be reloaded."
+WARNING_TOKENIZER_ALREADY_LOADED_RELOAD = (
+    "WARNING: The tokenizer for model {} is already loaded. It will be reloaded."
+)
@@ -1,5 +1,7 @@
 from rhesis.sdk.models.providers.gemini import GeminiLLM
+from rhesis.sdk.models.providers.huggingface import HuggingFaceLLM
+from rhesis.sdk.models.providers.litellm import LiteLLM
 from rhesis.sdk.models.providers.native import RhesisLLM
 from rhesis.sdk.models.providers.openai import OpenAILLM
 
-__all__ = ["RhesisLLM", "GeminiLLM", "OpenAILLM"]
+__all__ = ["RhesisLLM", "HuggingFaceLLM", "LiteLLM", "GeminiLLM", "OpenAILLM"]
@@ -9,21 +9,15 @@
 
 """
 
-import json
 import os
-from typing import Optional, Union
 
-from litellm import completion
-from pydantic import BaseModel
-
-from rhesis.sdk.models.base import BaseLLM
-from rhesis.sdk.models.utils import validate_llm_response
+from rhesis.sdk.models.providers.litellm import LiteLLM
 
 PROVIDER = "gemini"
 DEFAULT_MODEL_NAME = "gemini-2.0-flash"
 
 
-class GeminiLLM(BaseLLM):
+class GeminiLLM(LiteLLM):
     def __init__(self, model_name: str = DEFAULT_MODEL_NAME, api_key=None, **kwargs):
         """
         GeminiLLM: Google Gemini LLM Provider
@@ -47,30 +41,7 @@ def __init__(self, model_name: str = DEFAULT_MODEL_NAME, api_key=None, **kwargs)
         Raises:
             ValueError: If the API key is not set.
         """
-        self.api_key = api_key or os.getenv("GEMINI_API_KEY")
-        if self.api_key is None:
+        api_key = api_key or os.getenv("GEMINI_API_KEY")
+        if api_key is None:
             raise ValueError("GEMINI_API_KEY is not set")
-        super().__init__(model_name)
-
-    def load_model(self, *args, **kwargs):
-        return None  # LiteLLM handles model loading internally
-
-    def generate(
-        self, prompt: str, schema: Optional[BaseModel] = None, *args, **kwargs
-    ) -> Union[str, dict]:
-        messages = [{"role": "user", "content": prompt}]
-        response = completion(
-            model=f"{PROVIDER}/{self.model_name}",
-            messages=messages,
-            response_format=schema,
-            api_key=self.api_key,
-            *args,
-            **kwargs,
-        )
-        response_content = response.choices[0].message.content
-        if schema:
-            response_content = json.loads(response_content)
-            validate_llm_response(response_content, schema)
-            return response_content
-        else:
-            return response_content
+        super().__init__(PROVIDER + "/" + model_name, api_key=api_key)
@@ -0,0 +1,175 @@
+import gc
+from typing import Optional
+
+import torch
+from transformers import AutoModelForCausalLM, AutoTokenizer
+
+from rhesis.sdk.errors import (
+    HUGGINGFACE_MODEL_NOT_LOADED,
+    MODEL_RELOAD_WARNING,
+    NO_MODEL_NAME_PROVIDED,
+    WARNING_TOKENIZER_ALREADY_LOADED_RELOAD,
+)
+from rhesis.sdk.models.base import BaseLLM
+
+
+class HuggingFaceLLM(BaseLLM):
+    """
+    A standard implementation of a model available on Hugging Face's model hub.
+    This class provides a basic structure for loading and using models from Hugging Face.
+    It can be extended to include specific models or configurations as needed.
+    A complete implementation may be needed for unusual models or configurations.
+    Example usage:
+        >>> llm = HugginFaceLLM("crumb/nano-mistral")
+        >>> result = llm.generate("Tell me a joke.")
+        >>> print(result)
+    """
+
+    def __init__(
+        self, model_name: str, auto_loading: bool = True, default_kwargs: Optional[dict] = None
+    ):
+        """
+        Initialize the model with the given name and location.
+        Args:
+            model_name: The location to pull the model from
+            auto_loading: Whether to automatically load the model on initialization.
+             If turned off, manual loading is needed. Allows lazy loading.
+        """
+        if not model_name or not isinstance(model_name, str) or model_name.strip() == "":
+            raise ValueError(NO_MODEL_NAME_PROVIDED)
+
+        self.model_name = model_name
+        self.default_kwargs = default_kwargs
+
+        self.model = None
+        self.tokenizer = None
+        self.device = None
+
+        if auto_loading:
+            (self.model, self.tokenizer, self.device) = self.load_model()
+
+    def __del__(self):
+        """
+        If the model or tokenizer is loaded, unload them to free up resources.
+        Unloading manually is cleaner, but this is a fallback.
+        """
+        if self.model is not None or self.tokenizer is not None:
+            self.unload_model()
+
+    def load_model(self):
+        """
+        Load the model and tokenizer from the specified location.
+        """
+        if self.model is not None:
+            print(MODEL_RELOAD_WARNING.format(self.model_name))
+        if self.tokenizer is not None:
+            print(WARNING_TOKENIZER_ALREADY_LOADED_RELOAD.format(self.model_name))
+
+        model = AutoModelForCausalLM.from_pretrained(
+            self.model_name,
+        )
+
+        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+        model.to(device)
+
+        tokenizer = AutoTokenizer.from_pretrained(
+            self.model_name,
+        )
+
+        return model, tokenizer, device
+
+    def unload_model(self):
+        """
+        Aggressively unload the model and tokenizer to free up GPU/CPU memory.
+        This handles edge cases such as partial allocations and hanging references.
+        """
+        # Unload model
+        try:
+            if self.model is not None:
+                try:
+                    self.model.cpu()
+                except Exception:
+                    pass
+                try:
+                    # Clear state_dict if available
+                    if hasattr(self.model, "state_dict"):
+                        sd = self.model.state_dict()
+                        for k in list(sd.keys()):
+                            sd.pop(k)
+                        del sd
+                except Exception:
+                    pass
+                del self.model
+                self.model = None
+        except Exception:
+            pass
+
+        # Unload tokenizer
+        try:
+            if self.tokenizer is not None:
+                try:
+                    if hasattr(self.tokenizer, "backend_tokenizer"):
+                        self.tokenizer.backend_tokenizer = None
+                except Exception:
+                    pass
+                del self.tokenizer
+                self.tokenizer = None
+        except Exception:
+            pass
+
+        # Force cleanup
+        torch.cuda.empty_cache()
+        gc.collect()
+        torch.cuda.empty_cache()
+
+    def generate(
+        self,
+        prompt: str,
+        system_prompt: Optional[str] = None,
+        **kwargs,
+    ) -> str:
+        """
+        Generate a response from the model
+        """
+
+        # check model and tokenizer
+        if self.model is None or self.tokenizer is None:
+            raise RuntimeError(HUGGINGFACE_MODEL_NOT_LOADED)
+
+        # format arguments
+        if self.default_kwargs:
+            kwargs = {**self.default_kwargs, **kwargs}
+
+        if hasattr(self.tokenizer, "chat_template") and self.tokenizer.chat_template is not None:
+            messages = (
+                [
+                    {"role": "system", "content": system_prompt},
+                    {"role": "user", "content": prompt},
+                ]
+                if system_prompt
+                else [
+                    {"role": "user", "content": prompt},
+                ]
+            )
+            inputs = self.tokenizer.apply_chat_template(
+                messages, add_generation_prompt=True, return_dict=True, return_tensors="pt"
+            ).to(self.device)
+        else:
+            messages = f"{system_prompt}\n\n{prompt}" if system_prompt else prompt
+            inputs = self.tokenizer(messages, return_tensors="pt").to(self.device)
+
+        # generate response
+        output_ids = self.model.generate(
+            **inputs,
+            pad_token_id=self.tokenizer.eos_token_id,
+            eos_token_id=self.tokenizer.eos_token_id,
+            **kwargs,
+        )
+
+        completion = self.tokenizer.decode(
+            output_ids[0][inputs["input_ids"].shape[1] :],  # only take the newly generated content
+            skip_special_tokens=True,
+            clean_up_tokenization_spaces=True,
+        ).strip()
+
+        return completion
@@ -0,0 +1,78 @@
+import json
+from typing import Optional
+
+from litellm import completion
+from pydantic import BaseModel
+
+from rhesis.sdk.errors import NO_MODEL_NAME_PROVIDED
+from rhesis.sdk.models.base import BaseLLM
+from rhesis.sdk.models.utils import validate_llm_response
+
+
+class LiteLLM(BaseLLM):
+    def __init__(self, model_name: str, api_key: Optional[str] = None):
+        """
+        LiteLLM: LiteLLM Provider for Model inference
+
+        This class provides an interface for interacting with all models accessible through LiteLLM.
+
+        Args:
+            model_name (str): The name of the model to use including the provider.
+            api_key (Optional[str]): The API key for authentication.
+             If not provided, LiteLLM will handle it internally.
+
+        Usage:
+            >>> llm = LiteLLM(model_name="provider/model", api_key="your_api_key")
+            >>> result = llm.generate(prompt="Tell me a joke.", system_prompt="You are funny")
+            >>> print(result)
+
+        If a Pydantic schema is provided to `generate`, the response will be validated and returned
+        as a dict.
+        """
+        self.api_key = api_key  # LiteLLM will handle Environment Retrieval
+        if not model_name or not isinstance(model_name, str) or model_name.strip() == "":
+            raise ValueError(NO_MODEL_NAME_PROVIDED)
+        super().__init__(model_name)
+
+    def load_model(self):
+        """
+        LiteLLM handles model loading internally, so no loading is needed
+        """
+        pass
+
+    def generate(
+        self,
+        prompt: str,
+        system_prompt: Optional[str] = None,
+        schema: Optional[BaseModel] = None,
+        *args,
+        **kwargs,
+    ):
+        """
+        Run a chat completion using LiteLLM, returning the response.
+        The schema will be used to validate the response if provided.
+        """
+        # handle system prompt
+        messages = (
+            [{"role": "system", "content": system_prompt}, {"role": "user", "content": prompt}]
+            if system_prompt
+            else [{"role": "user", "content": prompt}]
+        )
+
+        # Call the completion function passing given arguments
+        response = completion(
+            model=self.model_name,
+            messages=messages,
+            response_format=schema,
+            api_key=self.api_key,
+            *args,
+            **kwargs,
+        )
+
+        response_content = response.choices[0].message.content
+        if schema:
+            response_content = json.loads(response_content)
+            validate_llm_response(response_content, schema)
+            return response_content
+        else:
+            return response_content
@@ -1,18 +1,17 @@
-from litellm import completion
+"""
+Only partially supported. No testing done yet
+"""
 
-from rhesis.sdk.models.base import BaseLLM
+import os
 
+from rhesis.sdk.models.providers.litellm import LiteLLM
 
-class OpenAILLM(BaseLLM):
-    def load_model(self, *args, **kwargs):
-        return None  # LiteLLM handles model loading internally
+DEFAULT_MODEL_NAME = "gpt-4"
 
-    def generate(self, prompt: str, *args, **kwargs) -> str:
-        messages = [{"role": "user", "content": prompt}]
-        response = completion(model=self.model_name, messages=messages, *args, **kwargs)
-        return response.choices[0].message.content
 
-
-if __name__ == "__main__":
-    openai = OpenAILLM(model_name="gpt-4")
-    print(openai.generate("Hello, how are you?"))
+class OpenAILLM(LiteLLM):
+    def __init__(self, model_name=DEFAULT_MODEL_NAME, api_key=None):
+        api_key = api_key or os.getenv("OPENAI_API_KEY")
+        if api_key is None:
+            raise ValueError("OPENAI_API_KEY is not set")
+        super().__init__(model_name, api_key=api_key)
Original file line number	Diff line number	Diff line change
`@@ -39,6 +39,8 @@ dependencies = [`
`39`	`39`	`"tomli>=2.2.1",`
`40`	`40`	`"tomli-w>=1.2.0",`
`41`	`41`	`"litellm>=1.76.0",`
	`42`	`+ "torch>=2.8.0",`
	`43`	`+ "transformers>=4.56.0",`
`42`	`44`	`]`
`43`	`45`
`44`	`46`	`[project.license]`