From fb107c49e991fba103c9c1ed44accd31147933ac Mon Sep 17 00:00:00 2001 From: "Vladimir S. FONOV" Date: Wed, 5 Oct 2022 14:07:44 -0400 Subject: [PATCH 1/6] WIP: added OPLS algorithm, also added new convenience functions --- src/PartialLeastSquaresRegressor.jl | 1 + src/opls.jl | 47 +++++++++++++++++++++++++++++ src/pls1.jl | 20 ++++++++++++ src/types.jl | 27 +++++++++++++++++ test/_standardizer.jl | 3 +- 5 files changed, 97 insertions(+), 1 deletion(-) create mode 100644 src/opls.jl diff --git a/src/PartialLeastSquaresRegressor.jl b/src/PartialLeastSquaresRegressor.jl index fa1591a..9cf19e9 100644 --- a/src/PartialLeastSquaresRegressor.jl +++ b/src/PartialLeastSquaresRegressor.jl @@ -9,6 +9,7 @@ include("types.jl") include("pls1.jl") include("pls2.jl") include("kpls.jl") +include("opls.jl") include("method.jl") include("mlj_interface.jl") diff --git a/src/opls.jl b/src/opls.jl new file mode 100644 index 0000000..3669501 --- /dev/null +++ b/src/opls.jl @@ -0,0 +1,47 @@ +##filtering and learning - Ortogonal PLS, one feature +function fitting(model::OPLS1Model, + X::AbstractArray{T}, + Y::Vector{T}) where T<:AbstractFloat + + w=X'*Y # calculate weight vector + w=w/norm(w) # normalization + + for i in 1:model.n_components + t=X*w # calculate scores vector nrows + p=X'*t/(t'*t) # calculate loadings of X ncols + wosc=p-(w'*p)/(w'*w)*w # orthogonal weight ncols + wosc=wosc/norm(wosc) # normalization ncols + tosc=X*wosc # orthogonal components nrows + posc=X'*tosc/(tosc'*tosc) # loadings ncols + + X=X-tosc*posc' # remove orthogonal components + + model.W_ortho[:,i]=wosc # weights orthogonal to y + model.P_ortho[:,i]=posc # loadings orthogonal to y + model.T_ortho[:,i]=tosc # scores orthogonal to y + end + + # X is now with orthogonal signal components removed + return X, model.T_ortho +end + +# remove orthogonal components from Xt +# return filtered Xt and weightings of orthogonal components +function filter!(model::OPLS1Model, + X::AbstractArray{T}) where T<:AbstractFloat + nrow = size(X,1) + # ortogonal weights + ortho = zeros(T, nrow, model.n_components) + + for i in 1:model.n_components + R = X * model.W_ortho[:,i] + Xt = X - R * model.P_ortho[:,i]' + ortho[:,i] = R + end + + return Xt, ortho +end + +function component(model::OPLS1Model{T},i) where T<:AbstractFloat + return model_pls.W[:,i] +end \ No newline at end of file diff --git a/src/pls1.jl b/src/pls1.jl index d92f1dc..84a9e34 100644 --- a/src/pls1.jl +++ b/src/pls1.jl @@ -40,3 +40,23 @@ function predictor(model::PLS1Model{T}, return Y end + +function filter!(model::PLS1Model{T}, + X::AbstractArray{T}) where T<:AbstractFloat + + nrows = size(X,1) + X_proj = zeros(T, nrows, model.nfactors) + Y_pred = zeros(size(X,1)) + + for i = 1:model.nfactors + X_proj[:,i] = X*model_pls.W[:,i] + X = X - X_proj[:,i]*model_pls.P[:,i]' + Y_pred = Y_pred + X_proj[:,i]*model_pls.b[i] # building prediction + end + + return X, X_proj, Y_pred +end + +function component(model::PLS1Model{T},i) where T<:AbstractFloat + return model_pls.W[:,i] +end \ No newline at end of file diff --git a/src/types.jl b/src/types.jl index 60d1909..f525013 100644 --- a/src/types.jl +++ b/src/types.jl @@ -93,3 +93,30 @@ function PLSModel(X::Matrix{T}, kernel, width) end + + +################################################################################ +#### OPLS1 type +mutable struct OPLS1Model{T<:AbstractFloat} <:PLSModel{T} + W_ortho::Matrix{T} # : weights orthogonal to y + P_ortho::Matrix{T} # : loadings orthogonal to y + T_ortho::Matrix{T} # : scores orthogonal to y + + n_ortho_components::Int # +end + +## OPLS1: constructor +function OPLS1Model(X::Matrix{T}, + Y::Vector{T}, + n_components::Int) where T<:AbstractFloat + + (nrows,ncols) = size(X) + + ## Allocation + return OPLS1Model( + zeros(T, ncols, n_components), ## W_ortho + zeros(T, ncols, n_components), ## P_ortho + zeros(T, nrows, n_components), ## T_ortho + n_components ## n_components + ) +end \ No newline at end of file diff --git a/test/_standardizer.jl b/test/_standardizer.jl index a0b74ee..b0824de 100644 --- a/test/_standardizer.jl +++ b/test/_standardizer.jl @@ -6,7 +6,8 @@ using MLJModelInterface using ..MLJBase.Tables const MMI = MLJModelInterface using Statistics -import ..MLJBase.MLJScientificTypes.coerce +#import ..MLJBase.MLJScientificTypes.coerce +import ScientificTypes.coerce const UNIVARIATE_STD_DESCR = "Standardize (whiten) univariate data." const STANDARDIZER_DESCR = "Standardize (whiten) features (columns) "* From 3fd793121c1154edb5eb4f2540bf49888649592b Mon Sep 17 00:00:00 2001 From: "Vladimir S. FONOV" Date: Wed, 5 Oct 2022 15:19:18 -0400 Subject: [PATCH 2/6] WIP: fixed typos --- src/opls.jl | 12 ++++++------ src/pls1.jl | 8 ++++---- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/src/opls.jl b/src/opls.jl index 3669501..287ce80 100644 --- a/src/opls.jl +++ b/src/opls.jl @@ -6,7 +6,7 @@ function fitting(model::OPLS1Model, w=X'*Y # calculate weight vector w=w/norm(w) # normalization - for i in 1:model.n_components + for i in 1:model.n_ortho_components t=X*w # calculate scores vector nrows p=X'*t/(t'*t) # calculate loadings of X ncols wosc=p-(w'*p)/(w'*w)*w # orthogonal weight ncols @@ -31,17 +31,17 @@ function filter!(model::OPLS1Model, X::AbstractArray{T}) where T<:AbstractFloat nrow = size(X,1) # ortogonal weights - ortho = zeros(T, nrow, model.n_components) + ortho = zeros(T, nrow, model.n_ortho_components) - for i in 1:model.n_components + for i in 1:model.n_ortho_components R = X * model.W_ortho[:,i] - Xt = X - R * model.P_ortho[:,i]' + X = X - R * model.P_ortho[:,i]' ortho[:,i] = R end - return Xt, ortho + return X, ortho end function component(model::OPLS1Model{T},i) where T<:AbstractFloat - return model_pls.W[:,i] + return model.W_ortho[:,i] end \ No newline at end of file diff --git a/src/pls1.jl b/src/pls1.jl index 84a9e34..a08f0dc 100644 --- a/src/pls1.jl +++ b/src/pls1.jl @@ -49,14 +49,14 @@ function filter!(model::PLS1Model{T}, Y_pred = zeros(size(X,1)) for i = 1:model.nfactors - X_proj[:,i] = X*model_pls.W[:,i] - X = X - X_proj[:,i]*model_pls.P[:,i]' - Y_pred = Y_pred + X_proj[:,i]*model_pls.b[i] # building prediction + X_proj[:,i] = X*model.W[:,i] + X = X - X_proj[:,i]*model.P[:,i]' + Y_pred = Y_pred + X_proj[:,i]*model.b[i] # building prediction end return X, X_proj, Y_pred end function component(model::PLS1Model{T},i) where T<:AbstractFloat - return model_pls.W[:,i] + return model.W[:,i] end \ No newline at end of file From 05a8c8c33324992b6b3b761c110ca0ecd85dfe1d Mon Sep 17 00:00:00 2001 From: "Vladimir S. FONOV" Date: Wed, 5 Oct 2022 15:44:14 -0400 Subject: [PATCH 3/6] WIP: fixing tests --- Project.toml | 8 -------- test/_standardizer.jl | 3 ++- test/pls1_test.jl | 2 +- 3 files changed, 3 insertions(+), 10 deletions(-) diff --git a/Project.toml b/Project.toml index b3844bb..6ed92c3 100644 --- a/Project.toml +++ b/Project.toml @@ -14,11 +14,3 @@ Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" [compat] MLJModelInterface = "0.3,0.4,1" julia = "^1" - -[extras] -MLJBase = "a7f614a8-145f-11e9-1d2a-a57a1082229d" -Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" -Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" - -[targets] -test = ["Test", "Random", "MLJBase"] diff --git a/test/_standardizer.jl b/test/_standardizer.jl index b0824de..6834ac8 100644 --- a/test/_standardizer.jl +++ b/test/_standardizer.jl @@ -6,8 +6,9 @@ using MLJModelInterface using ..MLJBase.Tables const MMI = MLJModelInterface using Statistics +using ScientificTypes #import ..MLJBase.MLJScientificTypes.coerce -import ScientificTypes.coerce +#import ScientificTypes.coerce const UNIVARIATE_STD_DESCR = "Standardize (whiten) univariate data." const STANDARDIZER_DESCR = "Standardize (whiten) features (columns) "* diff --git a/test/pls1_test.jl b/test/pls1_test.jl index 10d9eac..627b5ee 100644 --- a/test/pls1_test.jl +++ b/test/pls1_test.jl @@ -48,7 +48,7 @@ Y = [2; 4; 6.0] - pls_pipe = MLJBase.@pipeline prediction_type=:deterministic Stand.Standardizer() PartialLeastSquaresRegressor.PLSRegressor(n_factors=1) target = Stand.Standardizer() + pls_pipe = MLJBase.@pipeline prediction_type=:deterministic Stand.Standardizer() PartialLeastSquaresRegressor.PLSRegressor(n_factors=1) target = Stand.Standardizer() pls_machine = MLJBase.machine(pls_pipe, X, Y) From 3f6f9956661bcc96e05bfe776b7f0eca2c75b951 Mon Sep 17 00:00:00 2001 From: "Vladimir S. FONOV" Date: Wed, 5 Oct 2022 15:50:31 -0400 Subject: [PATCH 4/6] Changed dependencies on tests --- test/Project.toml | 8 ++++++++ 1 file changed, 8 insertions(+) create mode 100644 test/Project.toml diff --git a/test/Project.toml b/test/Project.toml new file mode 100644 index 0000000..4524baa --- /dev/null +++ b/test/Project.toml @@ -0,0 +1,8 @@ +[deps] +MLJBase = "a7f614a8-145f-11e9-1d2a-a57a1082229d" +MLJModelInterface = "e80e1ace-859a-464e-9ed9-23947d8ae3ea" +Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" +ScientificTypes = "321657f4-b219-11e9-178b-2701a2544e81" +ScientificTypesBase = "30f210dd-8aff-4c5f-94ba-8e64358c1161" +Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" +Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" From 0efb26fcae15a00fa53476357fc445eb04a08973 Mon Sep 17 00:00:00 2001 From: "Vladimir S. FONOV" Date: Thu, 24 Nov 2022 07:52:03 +1030 Subject: [PATCH 5/6] Added helper functions to PLS2 interface --- src/pls2.jl | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/src/pls2.jl b/src/pls2.jl index df9be5f..0609178 100644 --- a/src/pls2.jl +++ b/src/pls2.jl @@ -61,3 +61,27 @@ function predictor(model::PLS2Model{T}, return Y end +function filter!(model::PLS2Model{T}, + X::AbstractArray{T}) where T<:AbstractFloat + + nrows = size(X,1) + X_proj = zeros(T, nrows, model.nfactors) + + W,Q,P = model.W,model.Q,model.P + nfactors = model.nfactors + Y = zeros(T,size(X,1),model.ntargetcols) + #println("nfactors: ",nfactors) + for i = 1:nfactors + #R = X*W[:,i] + X_proj[:,i] = X*model.W[:,i] + X = X - X_proj[:,i] * P[:,i]' + Y = Y + X_proj[:,i] * Q[:,i]' + end + + return X,X_proj,Y +end + + +function component(model::PLS2Model{T},i) where T<:AbstractFloat + return model.W[:,i] +end \ No newline at end of file From a84bd1a19b45487ae7ae29c33558bd2bfd250df8 Mon Sep 17 00:00:00 2001 From: "Vladimir S. FONOV" Date: Tue, 24 Jan 2023 17:30:42 -0500 Subject: [PATCH 6/6] Added reference to the original code and paper --- src/opls.jl | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/opls.jl b/src/opls.jl index 287ce80..59e62d6 100644 --- a/src/opls.jl +++ b/src/opls.jl @@ -1,3 +1,9 @@ +# based on the code from https://github.com/BiRG/pyopls +# reference: Johan Trygg and Svante Wold. Orthogonal projections to latent structures (O-PLS). +# J. Chemometrics 2002; 16: 119-128. DOI: 10.1002/cem.695 + + + ##filtering and learning - Ortogonal PLS, one feature function fitting(model::OPLS1Model, X::AbstractArray{T},