From 6885f7c0f53f18fb772901db2e69cc31a28c14a3 Mon Sep 17 00:00:00 2001 From: MothNik Date: Mon, 20 May 2024 15:11:16 +0200 Subject: [PATCH] style: made variable names of Whittaker base more concise with special focus on single character names; unified variable names globally --- chemotools/smooth/_whittaker_smooth.py | 4 +- .../_whittaker_base/auto_lambda/logml.py | 25 +++--- .../_whittaker_base/auto_lambda/shared.py | 12 +-- chemotools/utils/_whittaker_base/main.py | 79 ++++++++++--------- chemotools/utils/_whittaker_base/misc.py | 16 ++-- chemotools/utils/_whittaker_base/solvers.py | 44 ++++++----- tests/test_for_utils/test_whittaker_base.py | 16 ++-- tests/test_functionality.py | 8 +- 8 files changed, 111 insertions(+), 93 deletions(-) diff --git a/chemotools/smooth/_whittaker_smooth.py b/chemotools/smooth/_whittaker_smooth.py index 5c9bc16..025fea6 100644 --- a/chemotools/smooth/_whittaker_smooth.py +++ b/chemotools/smooth/_whittaker_smooth.py @@ -26,13 +26,13 @@ from sklearn.base import BaseEstimator, OneToOneFeatureMixin, TransformerMixin from sklearn.utils.validation import check_is_fitted -from chemotools.utils.check_inputs import check_input, check_weights from chemotools.utils._types import RealNumeric from chemotools.utils._whittaker_base import ( WhittakerLikeSolver, WhittakerSmoothLambda, WhittakerSmoothMethods, ) +from chemotools.utils.check_inputs import check_input, check_weights ### Main Class ### @@ -288,7 +288,7 @@ def transform( # Calculate the whittaker smooth return self._whittaker_solve( - X=X_, w=sample_weight_checked, use_same_w_for_all=use_same_w_for_all + X=X_, weights=sample_weight_checked, use_same_w_for_all=use_same_w_for_all )[0] def fit_transform( diff --git a/chemotools/utils/_whittaker_base/auto_lambda/logml.py b/chemotools/utils/_whittaker_base/auto_lambda/logml.py index 10cbe63..b8d1d25 100644 --- a/chemotools/utils/_whittaker_base/auto_lambda/logml.py +++ b/chemotools/utils/_whittaker_base/auto_lambda/logml.py @@ -30,7 +30,7 @@ def get_log_marginal_likelihood_constant_term( differences: int, penalty_mat_log_pseudo_det: float, - w: np.ndarray, + weights: np.ndarray, zero_weight_tol: float, ) -> float: """ @@ -64,9 +64,9 @@ def get_log_marginal_likelihood_constant_term( # first, the constant terms of the log marginal likelihood are computed starting # from the log pseudo-determinant of the weight matrix, i.e., the product of the # non-zero elements of the weight vector - nonzero_w_flags = w > w.max() * zero_weight_tol + nonzero_w_flags = weights > weights.max() * zero_weight_tol nnz_w = nonzero_w_flags.sum() - log_pseudo_det_w = np.log(w[nonzero_w_flags]).sum() + log_pseudo_det_w = np.log(weights[nonzero_w_flags]).sum() # the constant term of the log marginal likelihood is computed return ( @@ -82,9 +82,9 @@ def get_log_marginal_likelihood( lam: float, differences: int, diff_kernel_flipped: np.ndarray, - b: np.ndarray, - b_smooth: np.ndarray, - w: Union[float, np.ndarray], + rhs_b: np.ndarray, + rhs_b_smooth: np.ndarray, + weights: Union[float, np.ndarray], w_plus_penalty_plus_n_samples_term: float, ) -> float: """ @@ -153,13 +153,18 @@ def get_log_marginal_likelihood( """ # noqa: E501 # first, the weighted Sum of Squared Residuals is computed ... - wrss = get_smooth_wrss(b=b, b_smooth=b_smooth, w=w) + wrss = get_smooth_wrss( + rhs_b=rhs_b, + rhs_b_smooth=rhs_b_smooth, + weights=weights, + ) # ... followed by the Penalty Sum of Squares which requires the squared forward # finite differences of the smoothed series # NOTE: ``np.convolve`` is used to compute the forward finite differences and # since it flips the provided kernel, an already flipped kernel is used pss = ( - lam * np.square(np.convolve(b_smooth, diff_kernel_flipped, mode="valid")).sum() + lam + * np.square(np.convolve(rhs_b_smooth, diff_kernel_flipped, mode="valid")).sum() ) # besides the determinant of the combined left hand side matrix has to be @@ -174,7 +179,7 @@ def get_log_marginal_likelihood( return -0.5 * ( wrss + pss - - (b.size - differences) * log_lam + - (rhs_b.size - differences) * log_lam + lhs_logabsdet + w_plus_penalty_plus_n_samples_term ) @@ -183,7 +188,7 @@ def get_log_marginal_likelihood( # ill-conditioned and the log marginal likelihood cannot be computed # NOTE: since it is very hard to trigger this exception, it is not covered by the # tests - raise RuntimeError( # pragma: no cover + raise RuntimeError( # pragma: no cover "\nThe determinant of the combined left hand side matrix " "W + lambda * D.T @ D is negative, indicating that the system is extremely " "ill-conditioned.\n" diff --git a/chemotools/utils/_whittaker_base/auto_lambda/shared.py b/chemotools/utils/_whittaker_base/auto_lambda/shared.py index 362b6e0..0f95c8e 100644 --- a/chemotools/utils/_whittaker_base/auto_lambda/shared.py +++ b/chemotools/utils/_whittaker_base/auto_lambda/shared.py @@ -23,9 +23,9 @@ def get_smooth_wrss( - b: np.ndarray, - b_smooth: np.ndarray, - w: Union[float, np.ndarray], + rhs_b: np.ndarray, + rhs_b_smooth: np.ndarray, + weights: Union[float, np.ndarray], ) -> float: """ Computes the (weighted) Sum of Squared Residuals (w)RSS between the original and @@ -34,8 +34,8 @@ def get_smooth_wrss( """ # Case 1: no weights are provided - if isinstance(w, float): - return np.square(b - b_smooth).sum() + if isinstance(weights, float): + return np.square(rhs_b - rhs_b_smooth).sum() # Case 2: weights are provided - return (w * np.square(b - b_smooth)).sum() + return (weights * np.square(rhs_b - rhs_b_smooth)).sum() diff --git a/chemotools/utils/_whittaker_base/main.py b/chemotools/utils/_whittaker_base/main.py index c7aad6f..5aae7e7 100644 --- a/chemotools/utils/_whittaker_base/main.py +++ b/chemotools/utils/_whittaker_base/main.py @@ -176,8 +176,8 @@ def _setup_for_fit( def _solve( self, lam: float, - b_weighted: np.ndarray, - w: Union[float, np.ndarray], + rhs_b_weighted: np.ndarray, + weights: Union[float, np.ndarray], ) -> tuple[np.ndarray, _models.BandedSolvers, auto._Factorization]: """ Internal wrapper for the solver methods to solve the linear system of equations @@ -197,8 +197,8 @@ def _solve( differences=self.differences_, l_and_u=self._l_and_u_, penalty_mat_banded=self._penalty_mat_banded_, - b_weighted=b_weighted, - w=w, + rhs_b_weighted=rhs_b_weighted, + weights=weights, pentapy_enabled=self._pentapy_enabled_, ) @@ -207,8 +207,8 @@ def _solve( def _marginal_likelihood_objective( self, log_lam: float, - b: np.ndarray, - w: np.ndarray, + rhs_b: np.ndarray, + weights: np.ndarray, w_plus_penalty_plus_n_samples_term: float, ) -> float: """ @@ -226,8 +226,8 @@ def _marginal_likelihood_objective( # the solution of the linear system of equations is computed b_smooth, _, factorization = self._solve( lam=lam, - b_weighted=b * w, - w=w, + rhs_b_weighted=rhs_b * weights, + weights=weights, ) # finally, the log marginal likelihood is computed and returned (negative since @@ -239,9 +239,9 @@ def _marginal_likelihood_objective( lam=lam, differences=self.differences_, diff_kernel_flipped=self._diff_kernel_flipped_, - b=b, - b_smooth=b_smooth, - w=w, + rhs_b=rhs_b, + rhs_b_smooth=b_smooth, + weights=weights, w_plus_penalty_plus_n_samples_term=w_plus_penalty_plus_n_samples_term, ) @@ -249,8 +249,8 @@ def _marginal_likelihood_objective( def _solve_single_b_fixed_lam( self, - b: np.ndarray, - w: Union[float, np.ndarray], + rhs_b: np.ndarray, + weights: Union[float, np.ndarray], lam: Optional[float] = None, ) -> tuple[np.ndarray, float]: """ @@ -271,12 +271,12 @@ def _solve_single_b_fixed_lam( # the most efficient way around going into this method in the first place; # in the future this might change and thus, this case is kept for now, but # ignored for coverage - if isinstance(w, float): # pragma: no cover + if isinstance(weights, float): # pragma: no cover return ( self._solve( lam=lam, - b_weighted=b, - w=w, + rhs_b_weighted=rhs_b, + weights=weights, )[0], lam, ) @@ -285,16 +285,16 @@ def _solve_single_b_fixed_lam( return ( self._solve( lam=lam, - b_weighted=b * w, - w=w, + rhs_b_weighted=rhs_b * weights, + weights=weights, )[0], lam, ) def _solve_single_b_auto_lam_logml( self, - b: np.ndarray, - w: Union[float, np.ndarray], + rhs_b: np.ndarray, + weights: Union[float, np.ndarray], ) -> tuple[np.ndarray, float]: """ Solves for the Whittaker-like smoother solution for a single series with an @@ -305,7 +305,7 @@ def _solve_single_b_auto_lam_logml( # if the weights are not provided, the log marginal likelihood cannot be # computed - at least not in a meaningful way - if isinstance(w, (float, int)): + if isinstance(weights, (float, int)): raise ValueError( "\nAutomatic fitting of the penalty weight lambda by maximizing the " "log marginal likelihood is only possible if weights are provided.\n" @@ -316,7 +316,7 @@ def _solve_single_b_auto_lam_logml( w_plus_n_samples_term = auto.get_log_marginal_likelihood_constant_term( differences=self.differences_, penalty_mat_log_pseudo_det=self._penalty_mat_log_pseudo_det_, - w=w, + weights=weights, zero_weight_tol=self.__zero_weight_tol, ) @@ -324,17 +324,21 @@ def _solve_single_b_auto_lam_logml( opt_lambda = auto.get_optimized_lambda( fun=self._marginal_likelihood_objective, lam=self._lam_inter_, - args=(b, w, w_plus_n_samples_term), + args=(rhs_b, weights, w_plus_n_samples_term), ) # the optimal penalty weight lambda is returned together with the smoothed # series - return self._solve_single_b_fixed_lam(b=b, w=w, lam=opt_lambda) + return self._solve_single_b_fixed_lam( + rhs_b=rhs_b, + weights=weights, + lam=opt_lambda, + ) def _solve_multiple_b( self, X: np.ndarray, - w: Optional[np.ndarray], + weights: Optional[np.ndarray], ) -> tuple[np.ndarray, np.ndarray]: """ Solves for the Whittaker-like smoother solution for multiple series when the @@ -349,19 +353,19 @@ def _solve_multiple_b( # then, the solution of the linear system of equations is computed for the # transposed series matrix (expected right-hand side format for the solvers) # Case 1: no weights are provided - if w is None: + if weights is None: X_smooth, _, _ = self._solve( lam=self._lam_inter_.fixed_lambda, - b_weighted=X.transpose(), - w=1.0, + rhs_b_weighted=X.transpose(), + weights=1.0, ) # Case 2: weights are provided else: X_smooth, _, _ = self._solve( lam=self._lam_inter_.fixed_lambda, - b_weighted=(X * w).transpose(), - w=w[0, ::], + rhs_b_weighted=(X * weights).transpose(), + weights=weights[0, ::], ) return ( @@ -375,7 +379,7 @@ def _whittaker_solve( self, X: np.ndarray, *, - w: Optional[np.ndarray] = None, + weights: Optional[np.ndarray] = None, use_same_w_for_all: bool = False, ) -> tuple[np.ndarray, np.ndarray]: """ @@ -389,7 +393,7 @@ def _whittaker_solve( ---------- X : ndarray of shape (m, n) The series to be smoothed stored as individual rows. - w : ndarray of shape(1, n) or shape(m, n) or None + weights : ndarray of shape(1, n) or shape(m, n) or None The weights to be applied for smoothing. If only a single row is provided and ``use_same_w_for_all`` is ``True``, the same weights can be applied for all series in ``X``, which enhances the smoothing a lot for fixed @@ -415,7 +419,7 @@ def _whittaker_solve( # can be done more efficiently by leveraging LAPACK'S (not pentapy's) ability to # perform multiple solves from the same inversion at once if use_same_w_for_all and not self._lam_inter_.fit_auto: - return self._solve_multiple_b(X=X, w=w) + return self._solve_multiple_b(X=X, weights=weights) # otherwise, the solution of the linear system of equations is computed for # each series @@ -430,8 +434,11 @@ def _whittaker_solve( # then, the solution is computed for each series by means of a loop X_smooth = np.empty_like(X) lam = np.empty(shape=(X.shape[0],)) - w_gen = get_weight_generator(w=w, n_series=X.shape[0]) - for iter_i, (x_vect, w_vect) in enumerate(zip(X, w_gen)): - X_smooth[iter_i], lam[iter_i] = smooth_method(b=x_vect, w=w_vect) + w_gen = get_weight_generator(weights=weights, n_series=X.shape[0]) + for iter_i, (x_vect, wght) in enumerate(zip(X, w_gen)): + X_smooth[iter_i], lam[iter_i] = smooth_method( + rhs_b=x_vect, + weights=wght, + ) return X_smooth, lam diff --git a/chemotools/utils/_whittaker_base/misc.py b/chemotools/utils/_whittaker_base/misc.py index 35f8fd5..d6a191c 100644 --- a/chemotools/utils/_whittaker_base/misc.py +++ b/chemotools/utils/_whittaker_base/misc.py @@ -14,7 +14,7 @@ def get_weight_generator( - w: Any, + weights: Any, n_series: int, ) -> Generator[Union[float, np.ndarray], None, None]: """ @@ -24,25 +24,25 @@ def get_weight_generator( """ # if the weights are neither None nor a 2D-Array, an error is raised - if not (w is None or isinstance(w, np.ndarray)): + if not (weights is None or isinstance(weights, np.ndarray)): raise TypeError( f"The weights must either be None or a NumPy-2D-Array, but they are of " - f"type '{type(w)}'." + f"type '{type(weights)}'." ) # Case 1: No weights - if w is None: + if weights is None: for _ in range(n_series): yield 1.0 # Case 2: 2D weights - elif w.ndim == 2: + elif weights.ndim == 2: for idx in range(0, n_series): - yield w[idx] + yield weights[idx] # Case 3: Invalid weights - elif w.ndim != 2: + elif weights.ndim != 2: raise ValueError( f"If provided as an Array, the weights must be a 2D-Array, but they are " - f"{w.ndim}-dimensional with shape {w.shape}." + f"{weights.ndim}-dimensional with shape {weights.shape}." ) diff --git a/chemotools/utils/_whittaker_base/solvers.py b/chemotools/utils/_whittaker_base/solvers.py index 816a899..e8bd2f8 100644 --- a/chemotools/utils/_whittaker_base/solvers.py +++ b/chemotools/utils/_whittaker_base/solvers.py @@ -27,7 +27,10 @@ ### Functions ### -def solve_pentapy(a_banded: np.ndarray, b_weighted: np.ndarray) -> np.ndarray: +def solve_pentapy( + lhs_a_banded: np.ndarray, + rhs_b_weighted: np.ndarray, +) -> np.ndarray: """ Solves the linear system of equations ``(W + lam * D.T @ D) @ x = W @ b`` with the ``pentapy`` package. This is the same as solving the linear system ``A @ x = b`` @@ -41,10 +44,10 @@ def solve_pentapy(a_banded: np.ndarray, b_weighted: np.ndarray) -> np.ndarray: """ # for 1-dimensional right-hand side vectors, the solution is computed directly - if b_weighted.ndim == 1: + if rhs_b_weighted.ndim == 1: return pp.solve( - mat=a_banded, - rhs=b_weighted, + mat=lhs_a_banded, + rhs=rhs_b_weighted, is_flat=True, index_row_wise=False, solver=1, @@ -56,11 +59,11 @@ def solve_pentapy(a_banded: np.ndarray, b_weighted: np.ndarray) -> np.ndarray: # NOTE: the solutions are first written into the rows of the solution matrix # because row-access is more efficient for C-contiguous arrays; # afterwards, the solution matrix is transposed - solution = np.empty(shape=(b_weighted.shape[1], b_weighted.shape[0])) - for iter_j in range(0, b_weighted.shape[1]): + solution = np.empty(shape=(rhs_b_weighted.shape[1], rhs_b_weighted.shape[0])) + for iter_j in range(0, rhs_b_weighted.shape[1]): solution[iter_j, ::] = pp.solve( - mat=a_banded, - rhs=b_weighted[::, iter_j], + mat=lhs_a_banded, + rhs=rhs_b_weighted[::, iter_j], is_flat=True, index_row_wise=False, solver=1, @@ -71,8 +74,8 @@ def solve_pentapy(a_banded: np.ndarray, b_weighted: np.ndarray) -> np.ndarray: def solve_ppivoted_lu( l_and_u: bla.LAndUBandCounts, - a_banded: np.ndarray, - b_weighted: np.ndarray, + lhs_a_banded: np.ndarray, + rhs_b_weighted: np.ndarray, ) -> tuple[np.ndarray, _models.BandedLUFactorization]: """ Solves the linear system of equations ``(W + lam * D.T @ D) @ x = W @ b`` with a @@ -87,13 +90,13 @@ def solve_ppivoted_lu( lub_factorization = bla.lu_banded( l_and_u=l_and_u, - ab=a_banded, + ab=lhs_a_banded, check_finite=False, ) return ( bla.lu_solve_banded( lub_factorization=lub_factorization, - b=b_weighted, + b=rhs_b_weighted, check_finite=False, overwrite_b=True, ), @@ -106,8 +109,8 @@ def solve_normal_equations( differences: int, l_and_u: bla.LAndUBandCounts, penalty_mat_banded: np.ndarray, - b_weighted: np.ndarray, - w: Union[float, np.ndarray], + rhs_b_weighted: np.ndarray, + weights: Union[float, np.ndarray], pentapy_enabled: bool, ) -> tuple[np.ndarray, _models.BandedSolvers, _Factorization]: """ @@ -177,13 +180,16 @@ def solve_normal_equations( # the banded storage format for the LAPACK LU decomposition is computed by # scaling the penalty matrix with the penalty weight lambda and then adding the # diagonal matrix with the weights - a_banded = lam * penalty_mat_banded - a_banded[differences, ::] += w + lhs_a_banded = lam * penalty_mat_banded + lhs_a_banded[differences, ::] += weights # the linear system of equations is solved with the most efficient method # Case 1: Pentapy can be used if pentapy_enabled: - x = solve_pentapy(a_banded=a_banded, b_weighted=b_weighted) + x = solve_pentapy( + lhs_a_banded=lhs_a_banded, + rhs_b_weighted=rhs_b_weighted, + ) if np.isfinite(x).all(): return ( x, @@ -195,8 +201,8 @@ def solve_normal_equations( try: x, lub_factorization = solve_ppivoted_lu( l_and_u=l_and_u, - a_banded=a_banded, - b_weighted=b_weighted, + lhs_a_banded=lhs_a_banded, + rhs_b_weighted=rhs_b_weighted, ) return ( x, diff --git a/tests/test_for_utils/test_whittaker_base.py b/tests/test_for_utils/test_whittaker_base.py index c4d1da2..0ee0412 100644 --- a/tests/test_for_utils/test_whittaker_base.py +++ b/tests/test_for_utils/test_whittaker_base.py @@ -252,7 +252,7 @@ def test_weight_generator( # if the expected output is an exception, the test is run in a context manager if not isinstance(expected_output, (np.ndarray, float, int)): with pytest.raises(expected_output): - for _ in get_weight_generator(w=weights, n_series=n_series): + for _ in get_weight_generator(weights=weights, n_series=n_series): pass return @@ -260,14 +260,14 @@ def test_weight_generator( # otherwise, the output is compared to the expected output # Case 1: the expected output is a scalar if isinstance(expected_output, (float, int)): - for w in get_weight_generator(w=weights, n_series=n_series): + for w in get_weight_generator(weights=weights, n_series=n_series): assert isinstance(w, (float, int)) assert w == expected_output return # Case 2: the expected output is an array - for w in get_weight_generator(w=weights, n_series=n_series): + for w in get_weight_generator(weights=weights, n_series=n_series): assert isinstance(w, np.ndarray) assert np.array_equal(w, expected_output) @@ -302,7 +302,7 @@ def test_smooth_wrss(combination: Tuple[bool, float]) -> None: ) # the wrss is calculated ... - wrss = get_smooth_wrss(b=a_series, b_smooth=b_series, w=weights) + wrss = get_smooth_wrss(rhs_b=a_series, rhs_b_smooth=b_series, weights=weights) # ... and compared to the expected value with a very strict tolerance assert np.isclose(wrss, wrss_expected, atol=1e-13, rtol=0.0) @@ -404,8 +404,8 @@ def test_normal_condition_solve_breaks_ill_conditioned(with_pentapy: bool) -> No differences=differences, l_and_u=(differences, differences), penalty_mat_banded=a_banded, - b_weighted=b_vect, - w=weights, + rhs_b_weighted=b_vect, + weights=weights, pentapy_enabled=with_pentapy, ) @@ -465,7 +465,7 @@ def test_auto_lambda_log_marginal_likelihood_refuses_no_weights( with pytest.raises(ValueError): whitt_base._whittaker_solve( X=X, - w=None, + weights=None, use_same_w_for_all=same_weights_for_all, ) @@ -539,7 +539,7 @@ def test_auto_lambda_log_marginal_likelihood( ) _, lambda_opts = whitt_base._whittaker_solve( X=X, - w=weights, + weights=weights, use_same_w_for_all=same_weights_for_all, ) diff --git a/tests/test_functionality.py b/tests/test_functionality.py index 3523443..b3f05d4 100644 --- a/tests/test_functionality.py +++ b/tests/test_functionality.py @@ -862,8 +862,8 @@ def test_whittaker_with_pentapy( # NOTE: the weight is not correct since the test only checks the method solve_method = whittaker_smooth._solve( lam=whittaker_smooth._lam_inter_.fixed_lambda, - b_weighted=spectrum.transpose(), - w=1.0, + rhs_b_weighted=spectrum.transpose(), + weights=1.0, )[1] assert solve_method == BandedSolvers.PENTAPY @@ -877,8 +877,8 @@ def test_whittaker_with_pentapy( # NOTE: the weight is not correct since the test only checks the method solve_method = whittaker_smooth._solve( lam=whittaker_smooth._lam_inter_.fixed_lambda, - b_weighted=spectrum.transpose(), - w=1.0, + rhs_b_weighted=spectrum.transpose(), + weights=1.0, )[1] assert solve_method == BandedSolvers.PIVOTED_LU assert np.allclose(spectrum_corr_pentapy[0], spectrum_corr_factorized_solve[0])