paucablop
diff --git a/‎chemotools/augmentation/__init__.py
Lines changed: 2 additions & 0 deletions b/‎chemotools/augmentation/__init__.py
Lines changed: 2 additions & 0 deletions
diff --git a/‎chemotools/augmentation/_gaussian_broadening.py
Lines changed: 136 additions & 0 deletions b/‎chemotools/augmentation/_gaussian_broadening.py
Lines changed: 136 additions & 0 deletions
diff --git a/‎chemotools/outliers/__init__.py
Lines changed: 7 additions & 0 deletions b/‎chemotools/outliers/__init__.py
Lines changed: 7 additions & 0 deletions
diff --git a/‎chemotools/outliers/_base.py
Lines changed: 180 additions & 0 deletions b/‎chemotools/outliers/_base.py
Lines changed: 180 additions & 0 deletions
@@ -1,6 +1,7 @@
 from ._add_noise import AddNoise
 from ._baseline_shift import BaselineShift
 from ._fractional_shift import FractionalShift
+from ._gaussian_broadening import GaussianBroadening
 from ._index_shift import IndexShift
 from ._spectrum_scale import SpectrumScale
 
@@ -9,6 +10,7 @@
     "AddNoise",
     "BaselineShift",
     "FractionalShift",
+    "GaussianBroadening",
     "IndexShift",
     "SpectrumScale",
 ]
@@ -0,0 +1,136 @@
+from typing import Literal, Optional
+import numpy as np
+from scipy.ndimage import gaussian_filter1d
+from sklearn.base import BaseEstimator, TransformerMixin, OneToOneFeatureMixin
+from sklearn.utils.validation import check_is_fitted, validate_data
+
+
+class GaussianBroadening(TransformerMixin, OneToOneFeatureMixin, BaseEstimator):
+    """
+    Transform spectral data by broadening peaks using Gaussian convolution.
+
+    This transformer applies Gaussian smoothing to broaden peaks in spectral data.
+    For each signal, a random sigma is chosen between 0 and the specified sigma value.
+
+    Parameters
+    ----------
+    sigma : float, default=1.0
+        Maximum standard deviation for the Gaussian kernel.
+        The actual sigma used will be randomly chosen between 0 and this value.
+
+    mode : {'reflect', 'constant', 'nearest', 'mirror', 'wrap'}, default='reflect'
+        The mode parameter determines how the input array is extended when
+        the filter overlaps a border. Default is 'reflect'.
+
+    pad_value : float, default=0.0
+        Value to fill past edges of input if mode is 'constant'.
+
+    random_state : int, optional, default=None
+        Random state for reproducible sigma selection.
+
+    truncate : float, default=4.0
+        Truncate the filter at this many standard deviations.
+        Larger values increase computation time but improve accuracy.
+    """
+
+    def __init__(
+        self,
+        sigma: float = 1.0,
+        mode: Literal["reflect", "constant", "nearest", "mirror", "wrap"] = "reflect",
+        pad_value: float = 0.0,
+        random_state: Optional[int] = None,
+        truncate: float = 4.0,
+    ):
+        self.sigma = sigma
+        self.mode = mode
+        self.pad_value = pad_value
+        self.random_state = random_state
+        self.truncate = truncate
+
+    def fit(self, X: np.ndarray, y=None) -> "GaussianBroadening":
+        """
+        Fit the transformer to the data (in this case, only validates input).
+
+        Parameters
+        ----------
+        X : array-like of shape (n_samples, n_features)
+            Input data to validate.
+
+        y : None
+            Ignored.
+
+        Returns
+        -------
+        self : GaussianBroadening
+            The fitted transformer.
+        """
+        X = validate_data(
+            self, X, y="no_validation", ensure_2d=True, reset=True, dtype=np.float64
+        )
+
+        # Validate sigma parameter
+        if not isinstance(self.sigma, (int, float)):
+            raise ValueError("sigma must be a number")
+        if self.sigma < 0:
+            raise ValueError("sigma must be non-negative")
+
+        # Initialize random number generator
+        self._rng = np.random.default_rng(self.random_state)
+
+        return self
+
+    def transform(self, X: np.ndarray, y=None) -> np.ndarray:
+        """
+        Apply Gaussian broadening to the input data.
+
+        Parameters
+        ----------
+        X : array-like of shape (n_samples, n_features)
+            The data to transform.
+
+        y : None
+            Ignored.
+
+        Returns
+        -------
+        X_transformed : ndarray of shape (n_samples, n_features)
+            The transformed data with broadened peaks.
+        """
+        check_is_fitted(self, "n_features_in_")
+        X_ = validate_data(
+            self,
+            X,
+            y="no_validation",
+            ensure_2d=True,
+            copy=True,
+            reset=False,
+            dtype=np.float64,
+        )
+
+        # Transform each sample
+        for i, x in enumerate(X_):
+            X_[i] = self._broaden_signal(x)
+
+        return X_
+
+    def _broaden_signal(self, x: np.ndarray) -> np.ndarray:
+        """
+        Apply Gaussian broadening to a single signal.
+
+        Parameters
+        ----------
+        x : ndarray of shape (n_features,)
+            The input signal to broaden.
+
+        Returns
+        -------
+        broadened_signal : ndarray of shape (n_features,)
+            The broadened signal.
+        """
+        # Randomly choose sigma between 0 and max sigma
+        sigma = self._rng.uniform(0, self.sigma)
+
+        # Apply Gaussian filter
+        return gaussian_filter1d(
+            x, sigma=sigma, mode=self.mode, cval=self.pad_value, truncate=self.truncate
+        )
@@ -0,0 +1,7 @@
+from .dmodx import DModX
+from .hotelling_t2 import HotellingT2
+from .q_residuals import QResiduals
+from .leverage import Leverage
+from .studentized_residuals import StudentizedResiduals
+
+__all__ = ["DModX", "HotellingT2", "QResiduals", "Leverage", "StudentizedResiduals"]
@@ -0,0 +1,180 @@
+from abc import ABC, abstractmethod
+from typing import Union, Optional
+
+import numpy as np
+
+from sklearn.base import BaseEstimator, OutlierMixin
+from sklearn.decomposition._base import _BasePCA
+from sklearn.cross_decomposition._pls import _PLS
+from sklearn.pipeline import Pipeline
+from sklearn.utils.validation import check_is_fitted
+
+from ._utils import validate_confidence, validate_and_extract_model
+
+ModelTypes = Union[_BasePCA, _PLS]
+
+
+class _ModelResidualsBase(ABC, BaseEstimator, OutlierMixin):
+    """Base class for model outlier calculations.
+
+    Implements statistical calculations for outlier detection in dimensionality
+    reduction models like PCA and PLS.
+
+    Parameters
+    ----------
+    model : Union[ModelTypes, Pipeline]
+        A fitted _BasePCA or _PLS models or Pipeline ending with such a model
+    confidence : float
+        Confidence level for statistical calculations (between 0 and 1)
+
+    Attributes
+    ----------
+    model_ : ModelTypes
+        The fitted model of type _BasePCA or _PLS
+
+    preprocessing_ : Optional[Pipeline]
+        Preprocessing steps before the model
+
+    n_features_in_ : int
+        Number of features in the input data
+
+    n_components_ : int
+        Number of components in the model
+
+    n_samples_ : int
+        Number of samples used to train the model
+
+    critical_value_ : float
+        The calculated critical value for outlier detection
+    """
+
+    def __init__(
+        self,
+        model: Union[ModelTypes, Pipeline],
+        confidence: float,
+    ) -> None:
+        (
+            self.model_,
+            self.preprocessing_,
+            self.n_features_in_,
+            self.n_components_,
+            self.n_samples_,
+        ) = validate_and_extract_model(model)
+        self.confidence = validate_confidence(confidence)
+
+    def fit_predict_residuals(
+        self, X: np.ndarray, y: Optional[np.ndarray] = None
+    ) -> np.ndarray:
+        """Fit the model to the input data and calculate the residuals.
+
+        Parameters
+        ----------
+        X : array-like of shape (n_samples, n_features)
+            Input data
+
+        y : array-like of shape (n_samples,), default=None
+            Target values
+
+        Returns
+        -------
+        ndarray of shape (n_samples,)
+            The residuals of the model
+        """
+        self.fit(X, y)
+        return self.predict_residuals(X, y, validate=True)
+
+    @abstractmethod
+    def predict_residuals(
+        self, X: np.ndarray, y: Optional[np.ndarray], validate: bool
+    ) -> np.ndarray:
+        """Calculate the residuals of the model.
+
+        Returns
+        -------
+        ndarray of shape (n_samples,)
+            The residuals of the model
+        """
+
+    @abstractmethod
+    def _calculate_critical_value(self, X: Optional[np.ndarray]) -> float:
+        """Calculate the critical value for outlier detection.
+
+        Returns
+        -------
+        float
+            The calculated critical value for outlier detection
+        """
+
+
+class _ModelDiagnosticsBase(ABC):
+    """Base class for model diagnostics methods. This does not implement outlier detection algorithms,
+    but rather implements methods that are used to assess trained models.
+
+    Parameters
+    ----------
+    model : Union[ModelTypes, Pipeline]
+        A fitted PCA/PLS model or Pipeline ending with such a model
+
+    Attributes
+    ----------
+    model_ : ModelTypes
+        The fitted model of type _BasePCA or _PLS
+
+    preprocessing_ : Optional[Pipeline]
+        Preprocessing steps before the model
+
+    """
+
+    def __init__(self, model: Union[ModelTypes, Pipeline]):
+        self.model_, self.preprocessing_ = self._validate_and_extract_model(model)
+
+    def _validate_and_extract_model(self, model):
+        """Validate and extract the model and preprocessing steps.
+
+        Parameters
+        ----------
+        model : Union[ModelTypes, Pipeline]
+            A fitted PCA/PLS model or Pipeline ending with such a model
+
+        Returns
+        -------
+        Tuple[ModelTypes, Optional[Pipeline]]
+            The extracted model and preprocessing steps
+
+        Raises
+        ------
+        ValueError
+            If the model is not of type _BasePCA or _PLS or a Pipeline ending with one of these types or if the model is not fitted
+        """
+        if isinstance(model, Pipeline):
+            preprocessing = model[:-1]
+            model = model[-1]
+        else:
+            preprocessing = None
+
+        if isinstance(model, (_BasePCA, _PLS)):
+            check_is_fitted(model)
+        else:
+            raise ValueError(
+                "Model not a valid model. Must be of base type _BasePCA or _PLS or a Pipeline ending with one of these types."
+            )
+        check_is_fitted(model)
+        return model, preprocessing
+
+    @abstractmethod
+    def predict(self, X: np.ndarray, y: Optional[np.ndarray]) -> np.ndarray:
+        """Predict the output of the model.
+
+        Parameters
+        ----------
+        X : array-like of shape (n_samples, n_features)
+            Input data
+
+        y : array-like of shape (n_samples,), default=None
+            Target values
+
+        Returns
+        -------
+        ndarray of shape (n_samples,)
+            Predicted values
+        """