Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions Orange/evaluation/testing.py
Original file line number Diff line number Diff line change
Expand Up @@ -541,6 +541,11 @@ def _collect_part_results(self, results, part_results):
results.failed[res.learner_i] = res.failed
continue

if len(res.values.shape) > 1 and res.values.shape[1] > 1:
msg = "Multiple targets are not supported."
results.failed[res.learner_i] = ValueError(msg)
continue

if self.store_models:
results.models[res.fold_i][res.learner_i] = res.model

Expand Down
1 change: 1 addition & 0 deletions Orange/regression/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
from .random_forest import *
from .tree import *
from .neural_network import *
from .pls import *
from ..classification.simple_tree import *
try:
from .catgb import *
Expand Down
197 changes: 197 additions & 0 deletions Orange/regression/pls.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,197 @@
import numpy as np
import sklearn.cross_decomposition as skl_pls

from Orange.data import Table, Domain, Variable, \
ContinuousVariable, StringVariable
from Orange.data.util import get_unique_names, SharedComputeValue
from Orange.preprocess.score import LearnerScorer
from Orange.regression.base_regression import SklLearnerRegression, \
SklModelRegression

__all__ = ["PLSRegressionLearner"]


class _FeatureScorerMixin(LearnerScorer):
feature_type = Variable
class_type = ContinuousVariable

def score(self, data):
model = self(data)
return np.abs(model.coefficients), model.domain.attributes


class _PLSCommonTransform:

def __init__(self, pls_model):
self.pls_model = pls_model

def _transform_with_numpy_output(self, X, Y):
"""
# the next command does the following
x_center = X - pls._x_mean
y_center = Y - pls._y_mean
t = x_center @ pls.x_rotations_
u = y_center @ pls.y_rotations_
"""
pls = self.pls_model.skl_model
t, u = pls.transform(X, Y)
return np.hstack((t, u))

def __call__(self, data):
if data.domain != self.pls_model.domain:
data = data.transform(self.pls_model.domain)
if len(data.Y.shape) == 1:
Y = data.Y.reshape(-1, 1)
else:
Y = data.Y
return self._transform_with_numpy_output(data.X, Y)

def __eq__(self, other):
if self is other:
return True
return type(self) is type(other) \
and self.pls_model == other.pls_model

def __hash__(self):
return hash(self.pls_model)


class PLSProjector(SharedComputeValue):
def __init__(self, transform, feature):
super().__init__(transform)
self.feature = feature

def compute(self, _, shared_data):
return shared_data[:, self.feature]

def __eq__(self, other):
if self is other:
return True
return super().__eq__(other) and self.feature == other.feature

def __hash__(self):
return hash((super().__hash__(), self.feature))


class PLSModel(SklModelRegression):
var_prefix_X = "PLS T"
var_prefix_Y = "PLS U"

@property
def coefficients(self):
return self.skl_model.coef_

def predict(self, X):
vals = self.skl_model.predict(X)
if len(self.domain.class_vars) == 1:
vals = vals.ravel()
return vals

def __str__(self):
return f"PLSModel {self.skl_model}"

def _get_var_names(self, n, prefix):
proposed = [f"{prefix}{postfix}" for postfix in range(1, n + 1)]
names = [var.name for var in self.domain.metas + self.domain.variables]
return get_unique_names(names, proposed)

def project(self, data):
if not isinstance(data, Table):
raise RuntimeError("PLSModel can only project tables")

transformer = _PLSCommonTransform(self)

def trvar(i, name):
return ContinuousVariable(name,
compute_value=PLSProjector(transformer,
i))

n_components = self.skl_model.x_loadings_.shape[1]

var_names_X = self._get_var_names(n_components, self.var_prefix_X)
var_names_Y = self._get_var_names(n_components, self.var_prefix_Y)

domain = Domain(
[trvar(i, var_names_X[i]) for i in range(n_components)],
data.domain.class_vars,
list(data.domain.metas) +
[trvar(n_components + i, var_names_Y[i]) for i in
range(n_components)]
)

return data.transform(domain)

def components(self):
orig_domain = self.domain
names = [a.name for a in
orig_domain.attributes + orig_domain.class_vars]
meta_name = get_unique_names(names, 'components')

n_components = self.skl_model.x_loadings_.shape[1]

meta_vars = [StringVariable(name=meta_name)]
metas = np.array(
[[f"Component {i + 1}" for i in range(n_components)]], dtype=object
).T
dom = Domain(
[ContinuousVariable(a.name) for a in orig_domain.attributes],
[ContinuousVariable(a.name) for a in orig_domain.class_vars],
metas=meta_vars)
components = Table(dom,
self.skl_model.x_loadings_.T,
Y=self.skl_model.y_loadings_.T,
metas=metas)
components.name = 'components'
return components

def coefficients_table(self):
coeffs = self.coefficients.T
domain = Domain(
[ContinuousVariable(f"coef {i}") for i in range(coeffs.shape[1])],
metas=[StringVariable("name")]
)
waves = [[attr.name] for attr in self.domain.attributes]
coef_table = Table.from_numpy(domain, X=coeffs, metas=waves)
coef_table.name = "coefficients"
return coef_table


class PLSRegressionLearner(SklLearnerRegression, _FeatureScorerMixin):
__wraps__ = skl_pls.PLSRegression
__returns__ = PLSModel
supports_multiclass = True
preprocessors = SklLearnerRegression.preprocessors

def fit(self, X, Y, W=None):
params = self.params.copy()
params["n_components"] = min(X.shape[1] - 1,
X.shape[0] - 1,
params["n_components"])
clf = self.__wraps__(**params)
return self.__returns__(clf.fit(X, Y))

# pylint: disable=unused-argument
def __init__(self, n_components=2, scale=True,
max_iter=500, preprocessors=None):
super().__init__(preprocessors=preprocessors)
self.params = vars()

def incompatibility_reason(self, domain):
reason = None
if not domain.class_vars:
reason = "Numeric targets expected."
else:
for cv in domain.class_vars:
if not cv.is_continuous:
reason = "Only numeric target variables expected."
return reason


if __name__ == '__main__':
import Orange

housing = Orange.data.Table('housing')
learners = [PLSRegressionLearner(n_components=2, max_iter=100)]
res = Orange.evaluation.CrossValidation()(housing, learners)
for learner, ca in zip(learners, Orange.evaluation.RMSE(res)):
print(f"learner: {learner}\nRMSE: {ca}\n")
146 changes: 146 additions & 0 deletions Orange/regression/tests/test_pls.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,146 @@
# pylint: disable=missing-docstring
import unittest

import numpy as np
from sklearn.cross_decomposition import PLSRegression

from Orange.data import Table, Domain, ContinuousVariable
from Orange.regression import PLSRegressionLearner
from Orange.regression.pls import _PLSCommonTransform


def table(rows, attr, variables):
attr_vars = [ContinuousVariable(name=f"Feature {i}") for i in
range(attr)]
class_vars = [ContinuousVariable(name=f"Class {i}") for i in
range(variables)]
domain = Domain(attr_vars, class_vars, [])
X = np.random.RandomState(0).random((rows, attr))
Y = np.random.RandomState(1).random((rows, variables))
return Table.from_numpy(domain, X=X, Y=Y)


class TestPLSRegressionLearner(unittest.TestCase):
def test_allow_y_dim(self):
""" The current PLS version allows only a single Y dimension. """
learner = PLSRegressionLearner(n_components=2)
d = table(10, 5, 0)
with self.assertRaises(ValueError):
learner(d)
for n_class_vars in [1, 2, 3]:
d = table(10, 5, n_class_vars)
learner(d) # no exception

def test_compare_to_sklearn(self):
d = table(10, 5, 1)
orange_model = PLSRegressionLearner()(d)
scikit_model = PLSRegression().fit(d.X, d.Y)
np.testing.assert_almost_equal(scikit_model.predict(d.X).ravel(),
orange_model(d))
np.testing.assert_almost_equal(scikit_model.coef_,
orange_model.coefficients)

def test_compare_to_sklearn_multid(self):
d = table(10, 5, 3)
orange_model = PLSRegressionLearner()(d)
scikit_model = PLSRegression().fit(d.X, d.Y)
np.testing.assert_almost_equal(scikit_model.predict(d.X),
orange_model(d))
np.testing.assert_almost_equal(scikit_model.coef_,
orange_model.coefficients)

def test_too_many_components(self):
# do not change n_components
d = table(5, 5, 1)
model = PLSRegressionLearner(n_components=4)(d)
self.assertEqual(model.skl_model.n_components, 4)
# need to use fewer components; column limited
d = table(6, 5, 1)
model = PLSRegressionLearner(n_components=6)(d)
self.assertEqual(model.skl_model.n_components, 4)
# need to use fewer components; row limited
d = table(5, 6, 1)
model = PLSRegressionLearner(n_components=6)(d)
self.assertEqual(model.skl_model.n_components, 4)

def test_scores(self):
for d in [table(10, 5, 1), table(10, 5, 3)]:
orange_model = PLSRegressionLearner()(d)
scikit_model = PLSRegression().fit(d.X, d.Y)
scores = orange_model.project(d)
sx, sy = scikit_model.transform(d.X, d.Y)
np.testing.assert_almost_equal(sx, scores.X)
np.testing.assert_almost_equal(sy, scores.metas)

def test_components(self):
def t2d(m):
return m.reshape(-1, 1) if len(m.shape) == 1 else m

for d in [table(10, 5, 1), table(10, 5, 3)]:
orange_model = PLSRegressionLearner()(d)
scikit_model = PLSRegression().fit(d.X, d.Y)
components = orange_model.components()
np.testing.assert_almost_equal(scikit_model.x_loadings_,
components.X.T)
np.testing.assert_almost_equal(scikit_model.y_loadings_,
t2d(components.Y).T)

def test_coefficients(self):
for d in [table(10, 5, 1), table(10, 5, 3)]:
orange_model = PLSRegressionLearner()(d)
scikit_model = PLSRegression().fit(d.X, d.Y)
coef_table = orange_model.coefficients_table()
np.testing.assert_almost_equal(scikit_model.coef_.T,
coef_table.X)

def test_eq_hash(self):
data = Table("housing")
pls1 = PLSRegressionLearner()(data)
pls2 = PLSRegressionLearner()(data)

proj1 = pls1.project(data)
proj2 = pls2.project(data)

np.testing.assert_equal(proj1.X, proj2.X)
np.testing.assert_equal(proj1.metas, proj2.metas)

# even though results are the same, these transformations
# are different because the PLS object is
self.assertNotEqual(proj1, proj2)
self.assertNotEqual(proj1.domain, proj2.domain)
self.assertNotEqual(hash(proj1), hash(proj2))
self.assertNotEqual(hash(proj1.domain), hash(proj2.domain))

proj2.domain[0].compute_value.compute_shared.pls_model = \
proj1.domain[0].compute_value.compute_shared.pls_model
# reset hash caches because object were hacked
# pylint: disable=protected-access
proj1.domain._hash = None
proj2.domain._hash = None

self.assertEqual(proj1.domain, proj2.domain)
self.assertEqual(hash(proj1.domain), hash(proj2.domain))


class TestPLSCommonTransform(unittest.TestCase):
def test_eq(self):
m = PLSRegressionLearner()(table(10, 5, 1))
transformer = _PLSCommonTransform(m)
self.assertEqual(transformer, transformer)
self.assertEqual(transformer, _PLSCommonTransform(m))

m = PLSRegressionLearner()(table(10, 5, 2))
self.assertNotEqual(transformer, _PLSCommonTransform(m))

def test_hash(self):
m = PLSRegressionLearner()(table(10, 5, 1))
transformer = _PLSCommonTransform(m)
self.assertEqual(hash(transformer), hash(transformer))
self.assertEqual(hash(transformer), hash(_PLSCommonTransform(m)))

m = PLSRegressionLearner()(table(10, 5, 2))
self.assertNotEqual(hash(transformer), hash(_PLSCommonTransform(m)))


if __name__ == "__main__":
unittest.main()
5 changes: 5 additions & 0 deletions Orange/widgets/evaluate/owpredictions.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
from Orange.widgets.utils.itemmodels import TableModel
from Orange.widgets.utils.annotated_data import lazy_annotated_table, \
domain_with_annotation_column, create_annotated_table
from Orange.widgets.utils.multi_target import multiple_targets_msg
from Orange.widgets.utils.sql import check_sql_input
from Orange.widgets.utils.state_summary import format_summary_details
from Orange.widgets.utils.colorpalettes import LimitedDiscretePalette
Expand Down Expand Up @@ -423,6 +424,10 @@ def _call_predictors(self):
backmappers, n_values = predictor.get_backmappers(self.data)
prob = predictor.backmap_probs(prob, n_values, backmappers)
pred = predictor.backmap_value(pred, prob, n_values, backmappers)
if len(pred.shape) > 1 and pred.shape[1] > 1:
self.predictors[index] = \
slot._replace(results=multiple_targets_msg)
continue
results.predicted = pred.reshape((1, len(self.data)))
results.probabilities = prob.reshape((1,) + prob.shape)

Expand Down
Loading