1
1
from abc import ABC , abstractmethod
2
- from typing import Union , Optional
2
+ from typing import Optional , Tuple , Union
3
3
4
4
import numpy as np
5
5
9
9
from sklearn .pipeline import Pipeline
10
10
from sklearn .utils .validation import check_is_fitted
11
11
12
- from ._utils import validate_confidence , validate_and_extract_model
13
12
14
13
ModelTypes = Union [_BasePCA , _PLS ]
15
14
@@ -29,10 +28,10 @@ class _ModelResidualsBase(ABC, BaseEstimator, OutlierMixin):
29
28
30
29
Attributes
31
30
----------
32
- model_ : ModelTypes
31
+ estimator_ : ModelTypes
33
32
The fitted model of type _BasePCA or _PLS
34
33
35
- preprocessing_ : Optional[Pipeline]
34
+ transformer_ : Optional[Pipeline]
36
35
Preprocessing steps before the model
37
36
38
37
n_features_in_ : int
@@ -54,13 +53,13 @@ def __init__(
54
53
confidence : float ,
55
54
) -> None :
56
55
(
57
- self .model_ ,
58
- self .preprocessing_ ,
56
+ self .estimator_ ,
57
+ self .transformer_ ,
59
58
self .n_features_in_ ,
60
59
self .n_components_ ,
61
60
self .n_samples_ ,
62
- ) = validate_and_extract_model (model )
63
- self .confidence = validate_confidence (confidence )
61
+ ) = _validate_and_extract_model (model )
62
+ self .confidence = _validate_confidence (confidence )
64
63
65
64
def fit_predict_residuals (
66
65
self , X : np .ndarray , y : Optional [np .ndarray ] = None
@@ -96,7 +95,7 @@ def predict_residuals(
96
95
"""
97
96
98
97
@abstractmethod
99
- def _calculate_critical_value (self , X : Optional [ np .ndarray ] ) -> float :
98
+ def _calculate_critical_value (self , X : np .ndarray ) -> float :
100
99
"""Calculate the critical value for outlier detection.
101
100
102
101
Returns
@@ -106,75 +105,84 @@ def _calculate_critical_value(self, X: Optional[np.ndarray]) -> float:
106
105
"""
107
106
108
107
109
- class _ModelDiagnosticsBase ( ABC ) :
110
- """Base class for model diagnostics methods. This does not implement outlier detection algorithms,
111
- but rather implements methods that are used to assess trained models .
108
+ def _get_model_parameters ( model : ModelTypes ) -> Tuple [ int , int , int ] :
109
+ """
110
+ Get the number of features, components and samples from a model with PLS or PCA. types .
112
111
113
112
Parameters
114
113
----------
115
- model : Union[ModelTypes, Pipeline]
116
- A fitted PCA/PLS model or Pipeline ending with such a model
117
-
118
- Attributes
119
- ----------
120
- model_ : ModelTypes
121
- The fitted model of type _BasePCA or _PLS
122
-
123
- preprocessing_ : Optional[Pipeline]
124
- Preprocessing steps before the model
114
+ model : ModelType
115
+ A fitted model of type _BasePCA or _PLS
125
116
117
+ Returns
118
+ -------
119
+ Tuple[int, int, int]
120
+ The number of features, components and samples in the model
126
121
"""
122
+ if isinstance (model , _BasePCA ):
123
+ return model .n_features_in_ , model .n_components_ , model .n_samples_
124
+ elif isinstance (model , _PLS ):
125
+ return model .n_features_in_ , model .n_components , len (model .x_scores_ )
126
+ else :
127
+ raise ValueError (
128
+ "Model not a valid model. Must be of base type _BasePCA or _PLS or a Pipeline ending with one of these types."
129
+ )
127
130
128
- def __init__ (self , model : Union [ModelTypes , Pipeline ]):
129
- self .model_ , self .preprocessing_ = self ._validate_and_extract_model (model )
130
131
131
- def _validate_and_extract_model ( self , model ) :
132
- """Validate and extract the model and preprocessing steps .
132
+ def _validate_confidence ( confidence : float ) -> float :
133
+ """Validate parameters using sklearn conventions .
133
134
134
- Parameters
135
- ----------
136
- model : Union[ModelTypes, Pipeline]
137
- A fitted PCA/PLS model or Pipeline ending with such a model
135
+ Parameters
136
+ ----------
137
+ confidence : float
138
+ Confidence level for statistical calculations (between 0 and 1)
138
139
139
- Returns
140
- -------
141
- Tuple[ModelTypes, Optional[Pipeline]]
142
- The extracted model and preprocessing steps
140
+ Returns
141
+ -------
142
+ float
143
+ The validated confidence level
143
144
144
- Raises
145
- ------
146
- ValueError
147
- If the model is not of type _BasePCA or _PLS or a Pipeline ending with one of these types or if the model is not fitted
148
- """
149
- if isinstance (model , Pipeline ):
150
- preprocessing = model [:- 1 ]
151
- model = model [- 1 ]
152
- else :
153
- preprocessing = None
154
-
155
- if isinstance (model , (_BasePCA , _PLS )):
156
- check_is_fitted (model )
157
- else :
158
- raise ValueError (
159
- "Model not a valid model. Must be of base type _BasePCA or _PLS or a Pipeline ending with one of these types."
160
- )
161
- check_is_fitted (model )
162
- return model , preprocessing
145
+ Raises
146
+ ------
147
+ ValueError
148
+ If confidence is not between 0 and 1
149
+ """
150
+ if not 0 < confidence < 1 :
151
+ raise ValueError ("Confidence must be between 0 and 1" )
152
+ return confidence
163
153
164
- @abstractmethod
165
- def predict (self , X : np .ndarray , y : Optional [np .ndarray ]) -> np .ndarray :
166
- """Predict the output of the model.
167
154
168
- Parameters
169
- ----------
170
- X : array-like of shape (n_samples, n_features)
171
- Input data
155
+ def _validate_and_extract_model (
156
+ model : Union [ ModelTypes , Pipeline ],
157
+ ) -> Tuple [ ModelTypes , Optional [ Pipeline ], int , int , int ]:
158
+ """Validate and extract the model and preprocessing steps.
172
159
173
- y : array-like of shape (n_samples,), default=None
174
- Target values
160
+ Parameters
161
+ ----------
162
+ model : Union[ModelTypes, Pipeline]
163
+ A fitted PCA/PLS model or Pipeline ending with such a model
175
164
176
- Returns
177
- -------
178
- ndarray of shape (n_samples,)
179
- Predicted values
180
- """
165
+ Returns
166
+ -------
167
+ Tuple[ModelTypes, Optional[Pipeline]]
168
+ The extracted model and preprocessing steps
169
+
170
+ Raises
171
+ ------
172
+ ValueError
173
+ If the model is not of type _BasePCA or _PLS or a Pipeline ending with one of these types or if the model is not fitted
174
+ """
175
+ if isinstance (model , Pipeline ):
176
+ preprocessing = model [:- 1 ]
177
+ model = model [- 1 ]
178
+ else :
179
+ preprocessing = None
180
+
181
+ if not isinstance (model , (_BasePCA , _PLS )):
182
+ raise ValueError (
183
+ "Model not a valid model. Must be of base type _BasePCA or _PLS or a Pipeline ending with one of these types."
184
+ )
185
+
186
+ check_is_fitted (model )
187
+ n_features_in , n_components , n_samples = _get_model_parameters (model )
188
+ return model , preprocessing , n_features_in , n_components , n_samples
0 commit comments