paucablop
diff --git a/‎chemotools/outliers/_base.py
Lines changed: 75 additions & 67 deletions b/‎chemotools/outliers/_base.py
Lines changed: 75 additions & 67 deletions
diff --git a/‎chemotools/outliers/_utils.py
Lines changed: 0 additions & 91 deletions b/‎chemotools/outliers/_utils.py
Lines changed: 0 additions & 91 deletions
diff --git a/‎chemotools/outliers/dmodx.py
Lines changed: 26 additions & 8 deletions b/‎chemotools/outliers/dmodx.py
Lines changed: 26 additions & 8 deletions
@@ -1,5 +1,5 @@
 from abc import ABC, abstractmethod
-from typing import Union, Optional
+from typing import Optional, Tuple, Union
 
 import numpy as np
 
@@ -9,7 +9,6 @@
 from sklearn.pipeline import Pipeline
 from sklearn.utils.validation import check_is_fitted
 
-from ._utils import validate_confidence, validate_and_extract_model
 
 ModelTypes = Union[_BasePCA, _PLS]
 
@@ -29,10 +28,10 @@ class _ModelResidualsBase(ABC, BaseEstimator, OutlierMixin):
 
     Attributes
     ----------
-    model_ : ModelTypes
+    estimator_ : ModelTypes
         The fitted model of type _BasePCA or _PLS
 
-    preprocessing_ : Optional[Pipeline]
+    transformer_ : Optional[Pipeline]
         Preprocessing steps before the model
 
     n_features_in_ : int
@@ -54,13 +53,13 @@ def __init__(
         confidence: float,
     ) -> None:
         (
-            self.model_,
-            self.preprocessing_,
+            self.estimator_,
+            self.transformer_,
             self.n_features_in_,
             self.n_components_,
             self.n_samples_,
-        ) = validate_and_extract_model(model)
-        self.confidence = validate_confidence(confidence)
+        ) = _validate_and_extract_model(model)
+        self.confidence = _validate_confidence(confidence)
 
     def fit_predict_residuals(
         self, X: np.ndarray, y: Optional[np.ndarray] = None
@@ -96,7 +95,7 @@ def predict_residuals(
         """
 
     @abstractmethod
-    def _calculate_critical_value(self, X: Optional[np.ndarray]) -> float:
+    def _calculate_critical_value(self, X: np.ndarray) -> float:
         """Calculate the critical value for outlier detection.
 
         Returns
@@ -106,75 +105,84 @@ def _calculate_critical_value(self, X: Optional[np.ndarray]) -> float:
         """
 
 
-class _ModelDiagnosticsBase(ABC):
-    """Base class for model diagnostics methods. This does not implement outlier detection algorithms,
-    but rather implements methods that are used to assess trained models.
+def _get_model_parameters(model: ModelTypes) -> Tuple[int, int, int]:
+    """
+    Get the number of features, components and samples from a model with PLS or PCA. types.
 
     Parameters
     ----------
-    model : Union[ModelTypes, Pipeline]
-        A fitted PCA/PLS model or Pipeline ending with such a model
-
-    Attributes
-    ----------
-    model_ : ModelTypes
-        The fitted model of type _BasePCA or _PLS
-
-    preprocessing_ : Optional[Pipeline]
-        Preprocessing steps before the model
+    model : ModelType
+        A fitted model of type _BasePCA or _PLS
 
+    Returns
+    -------
+    Tuple[int, int, int]
+        The number of features, components and samples in the model
     """
+    if isinstance(model, _BasePCA):
+        return model.n_features_in_, model.n_components_, model.n_samples_
+    elif isinstance(model, _PLS):
+        return model.n_features_in_, model.n_components, len(model.x_scores_)
+    else:
+        raise ValueError(
+            "Model not a valid model. Must be of base type _BasePCA or _PLS or a Pipeline ending with one of these types."
+        )
 
-    def __init__(self, model: Union[ModelTypes, Pipeline]):
-        self.model_, self.preprocessing_ = self._validate_and_extract_model(model)
 
-    def _validate_and_extract_model(self, model):
-        """Validate and extract the model and preprocessing steps.
+def _validate_confidence(confidence: float) -> float:
+    """Validate parameters using sklearn conventions.
 
-        Parameters
-        ----------
-        model : Union[ModelTypes, Pipeline]
-            A fitted PCA/PLS model or Pipeline ending with such a model
+    Parameters
+    ----------
+    confidence : float
+        Confidence level for statistical calculations (between 0 and 1)
 
-        Returns
-        -------
-        Tuple[ModelTypes, Optional[Pipeline]]
-            The extracted model and preprocessing steps
+    Returns
+    -------
+    float
+        The validated confidence level
 
-        Raises
-        ------
-        ValueError
-            If the model is not of type _BasePCA or _PLS or a Pipeline ending with one of these types or if the model is not fitted
-        """
-        if isinstance(model, Pipeline):
-            preprocessing = model[:-1]
-            model = model[-1]
-        else:
-            preprocessing = None
-
-        if isinstance(model, (_BasePCA, _PLS)):
-            check_is_fitted(model)
-        else:
-            raise ValueError(
-                "Model not a valid model. Must be of base type _BasePCA or _PLS or a Pipeline ending with one of these types."
-            )
-        check_is_fitted(model)
-        return model, preprocessing
+    Raises
+    ------
+    ValueError
+        If confidence is not between 0 and 1
+    """
+    if not 0 < confidence < 1:
+        raise ValueError("Confidence must be between 0 and 1")
+    return confidence
 
-    @abstractmethod
-    def predict(self, X: np.ndarray, y: Optional[np.ndarray]) -> np.ndarray:
-        """Predict the output of the model.
 
-        Parameters
-        ----------
-        X : array-like of shape (n_samples, n_features)
-            Input data
+def _validate_and_extract_model(
+    model: Union[ModelTypes, Pipeline],
+) -> Tuple[ModelTypes, Optional[Pipeline], int, int, int]:
+    """Validate and extract the model and preprocessing steps.
 
-        y : array-like of shape (n_samples,), default=None
-            Target values
+    Parameters
+    ----------
+    model : Union[ModelTypes, Pipeline]
+        A fitted PCA/PLS model or Pipeline ending with such a model
 
-        Returns
-        -------
-        ndarray of shape (n_samples,)
-            Predicted values
-        """
+    Returns
+    -------
+    Tuple[ModelTypes, Optional[Pipeline]]
+        The extracted model and preprocessing steps
+
+    Raises
+    ------
+    ValueError
+        If the model is not of type _BasePCA or _PLS or a Pipeline ending with one of these types or if the model is not fitted
+    """
+    if isinstance(model, Pipeline):
+        preprocessing = model[:-1]
+        model = model[-1]
+    else:
+        preprocessing = None
+
+    if not isinstance(model, (_BasePCA, _PLS)):
+        raise ValueError(
+            "Model not a valid model. Must be of base type _BasePCA or _PLS or a Pipeline ending with one of these types."
+        )
+
+    check_is_fitted(model)
+    n_features_in, n_components, n_samples = _get_model_parameters(model)
+    return model, preprocessing, n_features_in, n_components, n_samples
@@ -7,6 +7,7 @@
 
 
 from ._base import _ModelResidualsBase, ModelTypes
+from .utils import calculate_residual_spectrum
 
 
 class DModX(_ModelResidualsBase):
@@ -25,10 +26,10 @@ class DModX(_ModelResidualsBase):
 
     Attributes
     ----------
-    model_ : ModelType
+    estimator_ : ModelType
         The fitted model of type _BasePCA or _PLS
 
-    preprocessing_ : Optional[Pipeline]
+    transformer_ : Optional[Pipeline]
         Preprocessing steps before the model
 
     n_features_in_ : int
@@ -42,13 +43,17 @@ class DModX(_ModelResidualsBase):
 
     critical_value_ : float
         The calculated critical value for outlier detection
+
+    train_spe_: float
+        The training sum of squared errors (SSE) for the model normalized by degrees of freedom
     """
 
     def __init__(
         self,
         model: Union[ModelTypes, Pipeline],
         confidence: float = 0.95,
     ) -> None:
+        model, confidence = model, confidence
         super().__init__(model, confidence)
 
     def fit(self, X: np.ndarray, y: Optional[np.ndarray] = None) -> "DModX":
@@ -62,7 +67,18 @@ def fit(self, X: np.ndarray, y: Optional[np.ndarray] = None) -> "DModX":
             self, X, y="no_validation", ensure_2d=True, reset=True, dtype=np.float64
         )
 
+        # Calculate the critical value
         self.critical_value_ = self._calculate_critical_value()
+
+        # Calculate the degrees of freedom normalized SPE of the training set
+        residuals = calculate_residual_spectrum(X, self.estimator_)
+        squared_errors = np.sum((residuals) ** 2, axis=1)
+        self.train_spe_ = np.sqrt(
+            squared_errors
+            / (self.n_samples_ - self.n_components_ - 1)
+            * (self.n_features_in_ - self.n_components_)
+        )
+
         return self
 
     def predict(self, X: np.ndarray) -> np.ndarray:
@@ -118,15 +134,17 @@ def predict_residuals(
             )
 
         # Apply preprocessing if available
-        if self.preprocessing_:
-            X = self.preprocessing_.transform(X)
+        if self.transformer_:
+            X = self.transformer_.transform(X)
 
         # Calculate the DModX statistics
-        X_transformed = self.model_.transform(X)
-        X_reconstructed = self.model_.inverse_transform(X_transformed)
-        squared_errors = np.sum((X - X_reconstructed) ** 2, axis=1)
+        residual = calculate_residual_spectrum(X, self.estimator_)
+        squared_errors = np.sum((residual) ** 2, axis=1)
 
-        return np.sqrt(squared_errors / (self.n_features_in_ - self.n_components_))
+        return (
+            np.sqrt(squared_errors / (self.n_features_in_ - self.n_components_))
+            / self.train_spe_
+        )
 
     def _calculate_critical_value(self, X: Optional[np.ndarray] = None) -> float:
         """Calculate F-distribution based critical value.