implement changes, add coments with todo

akwasigroch · akwasigroch · commit 45151cac6315 · 2025-09-05T19:01:34.000+02:00
diff --git a/sdk/pyproject.toml b/sdk/pyproject.toml
@@ -38,8 +38,7 @@ dependencies = [
     "markitdown[pdf,docx,pptx,xlsx]>=0.1.0",
     "tomli>=2.2.1",
     "tomli-w>=1.2.0",
-    "deepeval>=3.0.0",
-    "mirascope>=1.24.0",
+    "deepeval==3.4.7",
     "tenacity>=8.2.3",
     "pyyaml>=6.0",
     "pydantic>=2.0.0",
diff --git a/sdk/src/rhesis/sdk/metrics/__init__.py b/sdk/src/rhesis/sdk/metrics/__init__.py
@@ -5,17 +5,6 @@
     RhesisMetricFactory,
     RhesisPromptMetric,
 )
-
-# Lazy import to avoid circular dependencies
-# from .deepeval import (  # Re-export DeepEval metrics
-#     DeepEvalMetricBase,
-#     DeepEvalMetricFactory,
-#     DeepEvalAnswerRelevancy,
-#     DeepEvalFaithfulness,
-#     DeepEvalContextualRelevancy,
-#     DeepEvalContextualPrecision,
-#     DeepEvalContextualRecall,
-# )
 from rhesis.sdk.metrics.providers.ragas import (  # Re-export Ragas metrics
     RagasAnswerRelevancy,
     RagasContextualPrecision,
@@ -52,14 +41,6 @@
     "RhesisMetricBase",
     "RhesisMetricFactory",
     "RhesisPromptMetric",
-    # DeepEval metrics (commented out to avoid circular imports)
-    # "DeepEvalMetricBase",
-    # "DeepEvalMetricFactory",
-    # "DeepEvalAnswerRelevancy",
-    # "DeepEvalFaithfulness",
-    # "DeepEvalContextualRelevancy",
-    # "DeepEvalContextualPrecision",
-    # "DeepEvalContextualRecall",
     # Ragas metrics
     "RagasMetricBase",
     "RagasMetricFactory",
diff --git a/sdk/src/rhesis/sdk/metrics/base.py b/sdk/src/rhesis/sdk/metrics/base.py
@@ -1,3 +1,22 @@
+"""
+
+TODO:
+These strings are spread all over the class as strings. Can we optimize this?
+
+
+# Extract all other keys as custom parameters
+reserved_keys = {
+    "class_name",
+    "backend",
+    "threshold",
+    "reference_score",
+    "threshold_operator",
+    "description",
+    "name",
+}
+Also, the method retry_evaluationmight be better placed in a utils type of module?
+"""
+
 from abc import ABC, abstractmethod
 from dataclasses import dataclass, field
 from functools import wraps
diff --git a/sdk/src/rhesis/sdk/metrics/evaluator.py b/sdk/src/rhesis/sdk/metrics/evaluator.py
@@ -1,3 +1,4 @@
+# TODO - move this file to the backend
 import concurrent.futures
 import logging
 from typing import Any, Dict, List, Optional, Tuple, Union
diff --git a/sdk/src/rhesis/sdk/metrics/providers/native/README.md b/sdk/src/rhesis/sdk/metrics/providers/native/README.md
@@ -10,7 +10,7 @@ A generic metric that evaluates outputs based on a custom prompt template. It us
 
 #### Features:
 - Supports customizable evaluation prompts, steps, and reasoning
-- Uses Mirascope's structured response models for robust parsing
+- Uses structured response models (Pydantinc and JSON Schema) for robust parsing
 - Provides both raw and normalized scores
 - Includes detailed evaluation reasoning
 
diff --git a/sdk/src/rhesis/sdk/metrics/utils.py b/sdk/src/rhesis/sdk/metrics/utils.py
@@ -1,3 +1,4 @@
+## TODO - move this file to the backend
 from typing import Any, Dict, List, Optional, Union
 
 from rhesis.sdk.metrics.base import MetricConfig
diff --git a/sdk/uv.lock b/sdk/uv.lock

Original file line number	Diff line number	Diff line change
`@@ -1,3 +1,4 @@`
	`1`	`+# TODO - move this file to the backend`
`1`	`2`	`import concurrent.futures`
`2`	`3`	`import logging`
`3`	`4`	`from typing import Any, Dict, List, Optional, Tuple, Union`
Original file line number	Diff line number	Diff line change
`@@ -1,3 +1,4 @@`
	`1`	`+## TODO - move this file to the backend`
`1`	`2`	`from typing import Any, Dict, List, Optional, Union`
`2`	`3`
`3`	`4`	`from rhesis.sdk.metrics.base import MetricConfig`