diff --git a/Orange/evaluation/testing.py b/Orange/evaluation/testing.py index 34a798241b6..d4070120604 100644 --- a/Orange/evaluation/testing.py +++ b/Orange/evaluation/testing.py @@ -541,6 +541,11 @@ def _collect_part_results(self, results, part_results): results.failed[res.learner_i] = res.failed continue + if len(res.values.shape) > 1 and res.values.shape[1] > 1: + msg = "Multiple targets are not supported." + results.failed[res.learner_i] = ValueError(msg) + continue + if self.store_models: results.models[res.fold_i][res.learner_i] = res.model diff --git a/Orange/regression/__init__.py b/Orange/regression/__init__.py index 62d24249f4e..74d68ed6a38 100644 --- a/Orange/regression/__init__.py +++ b/Orange/regression/__init__.py @@ -13,6 +13,7 @@ from .random_forest import * from .tree import * from .neural_network import * +from .pls import * from ..classification.simple_tree import * try: from .catgb import * diff --git a/Orange/regression/pls.py b/Orange/regression/pls.py new file mode 100644 index 00000000000..2d69283ecc3 --- /dev/null +++ b/Orange/regression/pls.py @@ -0,0 +1,197 @@ +import numpy as np +import sklearn.cross_decomposition as skl_pls + +from Orange.data import Table, Domain, Variable, \ + ContinuousVariable, StringVariable +from Orange.data.util import get_unique_names, SharedComputeValue +from Orange.preprocess.score import LearnerScorer +from Orange.regression.base_regression import SklLearnerRegression, \ + SklModelRegression + +__all__ = ["PLSRegressionLearner"] + + +class _FeatureScorerMixin(LearnerScorer): + feature_type = Variable + class_type = ContinuousVariable + + def score(self, data): + model = self(data) + return np.abs(model.coefficients), model.domain.attributes + + +class _PLSCommonTransform: + + def __init__(self, pls_model): + self.pls_model = pls_model + + def _transform_with_numpy_output(self, X, Y): + """ + # the next command does the following + x_center = X - pls._x_mean + y_center = Y - pls._y_mean + t = x_center @ pls.x_rotations_ + u = y_center @ pls.y_rotations_ + """ + pls = self.pls_model.skl_model + t, u = pls.transform(X, Y) + return np.hstack((t, u)) + + def __call__(self, data): + if data.domain != self.pls_model.domain: + data = data.transform(self.pls_model.domain) + if len(data.Y.shape) == 1: + Y = data.Y.reshape(-1, 1) + else: + Y = data.Y + return self._transform_with_numpy_output(data.X, Y) + + def __eq__(self, other): + if self is other: + return True + return type(self) is type(other) \ + and self.pls_model == other.pls_model + + def __hash__(self): + return hash(self.pls_model) + + +class PLSProjector(SharedComputeValue): + def __init__(self, transform, feature): + super().__init__(transform) + self.feature = feature + + def compute(self, _, shared_data): + return shared_data[:, self.feature] + + def __eq__(self, other): + if self is other: + return True + return super().__eq__(other) and self.feature == other.feature + + def __hash__(self): + return hash((super().__hash__(), self.feature)) + + +class PLSModel(SklModelRegression): + var_prefix_X = "PLS T" + var_prefix_Y = "PLS U" + + @property + def coefficients(self): + return self.skl_model.coef_ + + def predict(self, X): + vals = self.skl_model.predict(X) + if len(self.domain.class_vars) == 1: + vals = vals.ravel() + return vals + + def __str__(self): + return f"PLSModel {self.skl_model}" + + def _get_var_names(self, n, prefix): + proposed = [f"{prefix}{postfix}" for postfix in range(1, n + 1)] + names = [var.name for var in self.domain.metas + self.domain.variables] + return get_unique_names(names, proposed) + + def project(self, data): + if not isinstance(data, Table): + raise RuntimeError("PLSModel can only project tables") + + transformer = _PLSCommonTransform(self) + + def trvar(i, name): + return ContinuousVariable(name, + compute_value=PLSProjector(transformer, + i)) + + n_components = self.skl_model.x_loadings_.shape[1] + + var_names_X = self._get_var_names(n_components, self.var_prefix_X) + var_names_Y = self._get_var_names(n_components, self.var_prefix_Y) + + domain = Domain( + [trvar(i, var_names_X[i]) for i in range(n_components)], + data.domain.class_vars, + list(data.domain.metas) + + [trvar(n_components + i, var_names_Y[i]) for i in + range(n_components)] + ) + + return data.transform(domain) + + def components(self): + orig_domain = self.domain + names = [a.name for a in + orig_domain.attributes + orig_domain.class_vars] + meta_name = get_unique_names(names, 'components') + + n_components = self.skl_model.x_loadings_.shape[1] + + meta_vars = [StringVariable(name=meta_name)] + metas = np.array( + [[f"Component {i + 1}" for i in range(n_components)]], dtype=object + ).T + dom = Domain( + [ContinuousVariable(a.name) for a in orig_domain.attributes], + [ContinuousVariable(a.name) for a in orig_domain.class_vars], + metas=meta_vars) + components = Table(dom, + self.skl_model.x_loadings_.T, + Y=self.skl_model.y_loadings_.T, + metas=metas) + components.name = 'components' + return components + + def coefficients_table(self): + coeffs = self.coefficients.T + domain = Domain( + [ContinuousVariable(f"coef {i}") for i in range(coeffs.shape[1])], + metas=[StringVariable("name")] + ) + waves = [[attr.name] for attr in self.domain.attributes] + coef_table = Table.from_numpy(domain, X=coeffs, metas=waves) + coef_table.name = "coefficients" + return coef_table + + +class PLSRegressionLearner(SklLearnerRegression, _FeatureScorerMixin): + __wraps__ = skl_pls.PLSRegression + __returns__ = PLSModel + supports_multiclass = True + preprocessors = SklLearnerRegression.preprocessors + + def fit(self, X, Y, W=None): + params = self.params.copy() + params["n_components"] = min(X.shape[1] - 1, + X.shape[0] - 1, + params["n_components"]) + clf = self.__wraps__(**params) + return self.__returns__(clf.fit(X, Y)) + + # pylint: disable=unused-argument + def __init__(self, n_components=2, scale=True, + max_iter=500, preprocessors=None): + super().__init__(preprocessors=preprocessors) + self.params = vars() + + def incompatibility_reason(self, domain): + reason = None + if not domain.class_vars: + reason = "Numeric targets expected." + else: + for cv in domain.class_vars: + if not cv.is_continuous: + reason = "Only numeric target variables expected." + return reason + + +if __name__ == '__main__': + import Orange + + housing = Orange.data.Table('housing') + learners = [PLSRegressionLearner(n_components=2, max_iter=100)] + res = Orange.evaluation.CrossValidation()(housing, learners) + for learner, ca in zip(learners, Orange.evaluation.RMSE(res)): + print(f"learner: {learner}\nRMSE: {ca}\n") diff --git a/Orange/regression/tests/test_pls.py b/Orange/regression/tests/test_pls.py new file mode 100644 index 00000000000..c6bb237ca85 --- /dev/null +++ b/Orange/regression/tests/test_pls.py @@ -0,0 +1,146 @@ +# pylint: disable=missing-docstring +import unittest + +import numpy as np +from sklearn.cross_decomposition import PLSRegression + +from Orange.data import Table, Domain, ContinuousVariable +from Orange.regression import PLSRegressionLearner +from Orange.regression.pls import _PLSCommonTransform + + +def table(rows, attr, variables): + attr_vars = [ContinuousVariable(name=f"Feature {i}") for i in + range(attr)] + class_vars = [ContinuousVariable(name=f"Class {i}") for i in + range(variables)] + domain = Domain(attr_vars, class_vars, []) + X = np.random.RandomState(0).random((rows, attr)) + Y = np.random.RandomState(1).random((rows, variables)) + return Table.from_numpy(domain, X=X, Y=Y) + + +class TestPLSRegressionLearner(unittest.TestCase): + def test_allow_y_dim(self): + """ The current PLS version allows only a single Y dimension. """ + learner = PLSRegressionLearner(n_components=2) + d = table(10, 5, 0) + with self.assertRaises(ValueError): + learner(d) + for n_class_vars in [1, 2, 3]: + d = table(10, 5, n_class_vars) + learner(d) # no exception + + def test_compare_to_sklearn(self): + d = table(10, 5, 1) + orange_model = PLSRegressionLearner()(d) + scikit_model = PLSRegression().fit(d.X, d.Y) + np.testing.assert_almost_equal(scikit_model.predict(d.X).ravel(), + orange_model(d)) + np.testing.assert_almost_equal(scikit_model.coef_, + orange_model.coefficients) + + def test_compare_to_sklearn_multid(self): + d = table(10, 5, 3) + orange_model = PLSRegressionLearner()(d) + scikit_model = PLSRegression().fit(d.X, d.Y) + np.testing.assert_almost_equal(scikit_model.predict(d.X), + orange_model(d)) + np.testing.assert_almost_equal(scikit_model.coef_, + orange_model.coefficients) + + def test_too_many_components(self): + # do not change n_components + d = table(5, 5, 1) + model = PLSRegressionLearner(n_components=4)(d) + self.assertEqual(model.skl_model.n_components, 4) + # need to use fewer components; column limited + d = table(6, 5, 1) + model = PLSRegressionLearner(n_components=6)(d) + self.assertEqual(model.skl_model.n_components, 4) + # need to use fewer components; row limited + d = table(5, 6, 1) + model = PLSRegressionLearner(n_components=6)(d) + self.assertEqual(model.skl_model.n_components, 4) + + def test_scores(self): + for d in [table(10, 5, 1), table(10, 5, 3)]: + orange_model = PLSRegressionLearner()(d) + scikit_model = PLSRegression().fit(d.X, d.Y) + scores = orange_model.project(d) + sx, sy = scikit_model.transform(d.X, d.Y) + np.testing.assert_almost_equal(sx, scores.X) + np.testing.assert_almost_equal(sy, scores.metas) + + def test_components(self): + def t2d(m): + return m.reshape(-1, 1) if len(m.shape) == 1 else m + + for d in [table(10, 5, 1), table(10, 5, 3)]: + orange_model = PLSRegressionLearner()(d) + scikit_model = PLSRegression().fit(d.X, d.Y) + components = orange_model.components() + np.testing.assert_almost_equal(scikit_model.x_loadings_, + components.X.T) + np.testing.assert_almost_equal(scikit_model.y_loadings_, + t2d(components.Y).T) + + def test_coefficients(self): + for d in [table(10, 5, 1), table(10, 5, 3)]: + orange_model = PLSRegressionLearner()(d) + scikit_model = PLSRegression().fit(d.X, d.Y) + coef_table = orange_model.coefficients_table() + np.testing.assert_almost_equal(scikit_model.coef_.T, + coef_table.X) + + def test_eq_hash(self): + data = Table("housing") + pls1 = PLSRegressionLearner()(data) + pls2 = PLSRegressionLearner()(data) + + proj1 = pls1.project(data) + proj2 = pls2.project(data) + + np.testing.assert_equal(proj1.X, proj2.X) + np.testing.assert_equal(proj1.metas, proj2.metas) + + # even though results are the same, these transformations + # are different because the PLS object is + self.assertNotEqual(proj1, proj2) + self.assertNotEqual(proj1.domain, proj2.domain) + self.assertNotEqual(hash(proj1), hash(proj2)) + self.assertNotEqual(hash(proj1.domain), hash(proj2.domain)) + + proj2.domain[0].compute_value.compute_shared.pls_model = \ + proj1.domain[0].compute_value.compute_shared.pls_model + # reset hash caches because object were hacked + # pylint: disable=protected-access + proj1.domain._hash = None + proj2.domain._hash = None + + self.assertEqual(proj1.domain, proj2.domain) + self.assertEqual(hash(proj1.domain), hash(proj2.domain)) + + +class TestPLSCommonTransform(unittest.TestCase): + def test_eq(self): + m = PLSRegressionLearner()(table(10, 5, 1)) + transformer = _PLSCommonTransform(m) + self.assertEqual(transformer, transformer) + self.assertEqual(transformer, _PLSCommonTransform(m)) + + m = PLSRegressionLearner()(table(10, 5, 2)) + self.assertNotEqual(transformer, _PLSCommonTransform(m)) + + def test_hash(self): + m = PLSRegressionLearner()(table(10, 5, 1)) + transformer = _PLSCommonTransform(m) + self.assertEqual(hash(transformer), hash(transformer)) + self.assertEqual(hash(transformer), hash(_PLSCommonTransform(m))) + + m = PLSRegressionLearner()(table(10, 5, 2)) + self.assertNotEqual(hash(transformer), hash(_PLSCommonTransform(m))) + + +if __name__ == "__main__": + unittest.main() diff --git a/Orange/widgets/evaluate/owpredictions.py b/Orange/widgets/evaluate/owpredictions.py index fcf8ae1dbbc..edbc3ad476c 100644 --- a/Orange/widgets/evaluate/owpredictions.py +++ b/Orange/widgets/evaluate/owpredictions.py @@ -34,6 +34,7 @@ from Orange.widgets.utils.itemmodels import TableModel from Orange.widgets.utils.annotated_data import lazy_annotated_table, \ domain_with_annotation_column, create_annotated_table +from Orange.widgets.utils.multi_target import multiple_targets_msg from Orange.widgets.utils.sql import check_sql_input from Orange.widgets.utils.state_summary import format_summary_details from Orange.widgets.utils.colorpalettes import LimitedDiscretePalette @@ -423,6 +424,10 @@ def _call_predictors(self): backmappers, n_values = predictor.get_backmappers(self.data) prob = predictor.backmap_probs(prob, n_values, backmappers) pred = predictor.backmap_value(pred, prob, n_values, backmappers) + if len(pred.shape) > 1 and pred.shape[1] > 1: + self.predictors[index] = \ + slot._replace(results=multiple_targets_msg) + continue results.predicted = pred.reshape((1, len(self.data))) results.probabilities = prob.reshape((1,) + prob.shape) diff --git a/Orange/widgets/evaluate/tests/test_owpredictions.py b/Orange/widgets/evaluate/tests/test_owpredictions.py index cb26a10edd0..eacccd51d22 100644 --- a/Orange/widgets/evaluate/tests/test_owpredictions.py +++ b/Orange/widgets/evaluate/tests/test_owpredictions.py @@ -1,5 +1,5 @@ """Tests for OWPredictions""" -# pylint: disable=protected-access +# pylint: disable=protected-access,too-many-lines,too-many-public-methods import os import unittest from functools import partial @@ -18,7 +18,8 @@ from Orange.data.io import TabReader from Orange.evaluation.scoring import TargetScore from Orange.preprocess import Remove -from Orange.regression import LinearRegressionLearner, MeanLearner +from Orange.regression import LinearRegressionLearner, MeanLearner, \ + PLSRegressionLearner from Orange.widgets.tests.base import WidgetTest, GuiTest from Orange.widgets.evaluate.owpredictions import ( OWPredictions, SharedSelectionModel, SharedSelectionStore, DataModel, @@ -1319,6 +1320,28 @@ def test_output_error_cls(self): self.assertTrue(np.isnan(pred.metas[1, 4])) self.assertTrue(np.isnan(pred.metas[1, 9])) + def test_multiple_targets_pls(self): + class_vars = [self.housing.domain.class_var, + self.housing.domain.attributes[0]] + domain = Domain(self.housing.domain.attributes[1:], + class_vars=class_vars) + multiple_targets_data = self.housing.transform(domain) + + self.send_signal(self.widget.Inputs.data, multiple_targets_data) + self.send_signal(self.widget.Inputs.predictors, + PLSRegressionLearner()(multiple_targets_data)) + self.assertTrue(self.widget.Error.predictor_failed.is_shown()) + self.assertIn("Multiple targets are not supported.", + str(self.widget.Error.predictor_failed)) + + self.send_signal(self.widget.Inputs.data, None) + self.send_signal(self.widget.Inputs.predictors, None) + + self.send_signal(self.widget.Inputs.data, self.housing) + self.send_signal(self.widget.Inputs.predictors, + PLSRegressionLearner()(self.housing)) + self.assertFalse(self.widget.Error.predictor_failed.is_shown()) + class SelectionModelTest(unittest.TestCase): def setUp(self): diff --git a/Orange/widgets/evaluate/tests/test_owtestandscore.py b/Orange/widgets/evaluate/tests/test_owtestandscore.py index 0d2fd2c4a9b..3bc54c780fc 100644 --- a/Orange/widgets/evaluate/tests/test_owtestandscore.py +++ b/Orange/widgets/evaluate/tests/test_owtestandscore.py @@ -18,7 +18,7 @@ Score from Orange.base import Learner, Model from Orange.modelling import ConstantLearner -from Orange.regression import MeanLearner +from Orange.regression import MeanLearner, PLSRegressionLearner from Orange.widgets.evaluate.owtestandscore import ( OWTestAndScore, results_one_vs_rest) from Orange.widgets.settings import ( @@ -753,7 +753,7 @@ def compute_score(self, results): ]) data = Table.from_list(domain, [[1, 5, 0], [2, 10, 1], [2, 10, 1]]) - mock_model = Mock(spec=Model, return_value=np.asarray([[0.2, 0.1, 0.2]])) + mock_model = Mock(spec=Model, return_value=np.asarray([0.2, 0.1, 0.2])) mock_model.name = 'Mockery' mock_model.domain = domain mock_learner = Mock(spec=Learner, return_value=mock_model) @@ -769,6 +769,19 @@ def compute_score(self, results): self.assertTrue(NewScorer in widget.scorers) self.assertTrue(len(widget._successful_slots()) == 1) + def test_multiple_targets_pls(self): + housing = Table("housing") + class_vars = [housing.domain.class_var, housing.domain.attributes[0]] + domain = Domain(housing.domain.attributes[1:], class_vars=class_vars) + multiple_targets_data = housing.transform(domain) + + self.widget.error = Mock() + self.send_signal(self.widget.Inputs.train_data, multiple_targets_data) + self.send_signal(self.widget.Inputs.learner, PLSRegressionLearner()) + self.wait_until_finished() + self.assertIn("Multiple targets are not supported.", + self.widget.error.call_args[0][0]) + class TestHelpers(unittest.TestCase): def test_results_one_vs_rest(self): diff --git a/Orange/widgets/model/icons/PLS.svg b/Orange/widgets/model/icons/PLS.svg new file mode 100644 index 00000000000..84a1f070ed9 --- /dev/null +++ b/Orange/widgets/model/icons/PLS.svg @@ -0,0 +1,311 @@ + +image/svg+xml + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +PLS diff --git a/Orange/widgets/model/owpls.py b/Orange/widgets/model/owpls.py new file mode 100644 index 00000000000..1a74e7198ef --- /dev/null +++ b/Orange/widgets/model/owpls.py @@ -0,0 +1,101 @@ +from AnyQt.QtCore import Qt +import scipy.sparse as sp + +from Orange.data import Table, Domain +from Orange.regression import PLSRegressionLearner +from Orange.widgets import gui +from Orange.widgets.settings import Setting +from Orange.widgets.utils.owlearnerwidget import OWBaseLearner +from Orange.widgets.utils.signals import Output +from Orange.widgets.utils.widgetpreview import WidgetPreview +from Orange.widgets.widget import Msg + + +class OWPLS(OWBaseLearner): + name = 'PLS' + description = "Partial Least Squares Regression widget for multivariate data analysis" + icon = "icons/PLS.svg" + priority = 85 + keywords = ["partial least squares"] + + LEARNER = PLSRegressionLearner + + class Outputs(OWBaseLearner.Outputs): + coefsdata = Output("Coefficients", Table, explicit=True) + data = Output("Data", Table) + components = Output("Components", Table) + + class Warning(OWBaseLearner.Warning): + sparse_data = Msg( + 'Sparse input data: default preprocessing is to scale it.') + + n_components = Setting(2) + max_iter = Setting(500) + + def add_main_layout(self): + optimization_box = gui.vBox( + self.controlArea, "Optimization Parameters") + gui.spin( + optimization_box, self, "n_components", 1, 50, 1, + label="Components: ", + alignment=Qt.AlignRight, controlWidth=100, + callback=self.settings_changed) + gui.spin( + optimization_box, self, "max_iter", 5, 1000000, 50, + label="Iteration limit: ", + alignment=Qt.AlignRight, controlWidth=100, + callback=self.settings_changed, + checkCallback=self.settings_changed) + + def update_model(self): + super().update_model() + coef_table = None + data = None + components = None + if self.model is not None: + coef_table = self.model.coefficients_table() + data = self._create_output_data() + components = self.model.components() + self.Outputs.coefsdata.send(coef_table) + self.Outputs.data.send(data) + self.Outputs.components.send(components) + + def _create_output_data(self) -> Table: + projection = self.model.project(self.data) + data_domain = self.data.domain + proj_domain = projection.domain + metas = proj_domain.metas + proj_domain.attributes + domain = Domain(data_domain.attributes, data_domain.class_vars, metas) + return self.data.transform(domain) + + @OWBaseLearner.Inputs.data + def set_data(self, data): + # reimplemented completely because the base learner does not + # allow multiclass + + self.Warning.sparse_data.clear() + + self.Error.data_error.clear() + self.data = data + + if data is not None and data.domain.class_var is None and not data.domain.class_vars: + self.Error.data_error( + "Data has no target variable.\n" + "Select one with the Select Columns widget.") + self.data = None + + # invalidate the model so that handleNewSignals will update it + self.model = None + + if self.data and sp.issparse(self.data.X): + self.Warning.sparse_data() + + def create_learner(self): + common_args = {'preprocessors': self.preprocessors} + return PLSRegressionLearner(n_components=self.n_components, + max_iter=self.max_iter, + **common_args) + + +if __name__ == "__main__": # pragma: no cover + WidgetPreview(OWPLS).run(Table("housing")) diff --git a/Orange/widgets/model/tests/test_owpls.py b/Orange/widgets/model/tests/test_owpls.py new file mode 100644 index 00000000000..9c94e48a27e --- /dev/null +++ b/Orange/widgets/model/tests/test_owpls.py @@ -0,0 +1,76 @@ +import unittest + +from Orange.data import Table, Domain, StringVariable +from Orange.widgets.model.owpls import OWPLS +from Orange.widgets.tests.base import WidgetTest, WidgetLearnerTestMixin, \ + ParameterMapping + + +class TestOWPLS(WidgetTest, WidgetLearnerTestMixin): + @classmethod + def setUpClass(cls): + super().setUpClass() + cls._data = Table("housing") + cls._data = cls._data.add_column(StringVariable("Foo"), + ["Bar"] * len(cls._data), + to_metas=True) + class_vars = [cls._data.domain.class_var, + cls._data.domain.attributes[0]] + domain = Domain(cls._data.domain.attributes[1:], class_vars, + cls._data.domain.metas) + cls._data_multi_target = cls._data.transform(domain) + + def setUp(self): + self.widget = self.create_widget(OWPLS, + stored_settings={"auto_apply": False}) + self.init() + self.parameters = [ + ParameterMapping('max_iter', self.widget.controls.max_iter), + ParameterMapping('n_components', self.widget.controls.n_components) + ] + + def test_output_coefsdata(self): + self.send_signal(self.widget.Inputs.data, self._data) + coefsdata = self.get_output(self.widget.Outputs.coefsdata) + self.assertEqual(coefsdata.X.shape, (13, 1)) + self.assertEqual(coefsdata.Y.shape, (13, 0)) + self.assertEqual(coefsdata.metas.shape, (13, 1)) + + def test_output_coefsdata_multi_target(self): + self.send_signal(self.widget.Inputs.data, self._data_multi_target) + coefsdata = self.get_output(self.widget.Outputs.coefsdata) + self.assertEqual(coefsdata.X.shape, (12, 2)) + self.assertEqual(coefsdata.Y.shape, (12, 0)) + self.assertEqual(coefsdata.metas.shape, (12, 1)) + + def test_output_data(self): + self.send_signal(self.widget.Inputs.data, self._data) + output = self.get_output(self.widget.Outputs.data) + self.assertEqual(output.X.shape, (506, 13)) + self.assertEqual(output.Y.shape, (506,)) + self.assertEqual(output.metas.shape, (506, 5)) + + def test_output_data_multi_target(self): + self.send_signal(self.widget.Inputs.data, self._data_multi_target) + output = self.get_output(self.widget.Outputs.data) + self.assertEqual(output.X.shape, (506, 12)) + self.assertEqual(output.Y.shape, (506, 2)) + self.assertEqual(output.metas.shape, (506, 5)) + + def test_output_components(self): + self.send_signal(self.widget.Inputs.data, self._data) + components = self.get_output(self.widget.Outputs.components) + self.assertEqual(components.X.shape, (2, 13)) + self.assertEqual(components.Y.shape, (2,)) + self.assertEqual(components.metas.shape, (2, 1)) + + def test_output_components_multi_target(self): + self.send_signal(self.widget.Inputs.data, self._data_multi_target) + components = self.get_output(self.widget.Outputs.components) + self.assertEqual(components.X.shape, (2, 12)) + self.assertEqual(components.Y.shape, (2, 2)) + self.assertEqual(components.metas.shape, (2, 1)) + + +if __name__ == "__main__": + unittest.main() diff --git a/Orange/widgets/utils/multi_target.py b/Orange/widgets/utils/multi_target.py new file mode 100644 index 00000000000..831b44309ae --- /dev/null +++ b/Orange/widgets/utils/multi_target.py @@ -0,0 +1,28 @@ +from functools import wraps + +from Orange.widgets.utils.messages import UnboundMsg + +multiple_targets_msg = "Multiple targets are not supported." +_multiple_targets_data = UnboundMsg(multiple_targets_msg) + + +def check_multiple_targets_input(f): + """ + Wrapper for widget's set_data method that checks if the input + has multiple targets and shows an error if it does. + + :param f: widget's `set_data` method to wrap + :return: wrapped method that handles multiple targets data inputs + """ + + @wraps(f) + def new_f(widget, data, *args, **kwargs): + widget.Error.add_message("multiple_targets_data", + _multiple_targets_data) + widget.Error.multiple_targets_data.clear() + if data is not None and len(data.domain.class_vars) > 1: + widget.Error.multiple_targets_data() + data = None + return f(widget, data, *args, **kwargs) + + return new_f diff --git a/Orange/widgets/utils/tests/test_multi_target.py b/Orange/widgets/utils/tests/test_multi_target.py new file mode 100644 index 00000000000..93237922e9a --- /dev/null +++ b/Orange/widgets/utils/tests/test_multi_target.py @@ -0,0 +1,52 @@ +import unittest + +from Orange.data import Table, Domain, DiscreteVariable +from Orange.widgets.tests.base import WidgetTest +from Orange.widgets.utils.signals import Input +from Orange.widgets.utils.multi_target import check_multiple_targets_input +from Orange.widgets.widget import OWWidget + + +class TestMultiTargetDecorator(WidgetTest): + class MockWidget(OWWidget): + name = "MockWidget" + + NotCalled = object() + + class Inputs: + data = Input("Data", Table) + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.called_with = self.NotCalled + + @Inputs.data + @check_multiple_targets_input + def set_data(self, obj): + self.called_with = obj + + def pop_called_with(self): + t = self.called_with + self.called_with = self.NotCalled + return t + + def setUp(self): + self.widget = self.create_widget(self.MockWidget) + self.data = Table("iris") + + def test_check_multiple_targets_input(self): + class_vars = [self.data.domain.class_var, + DiscreteVariable("c1", values=("a", "b"))] + domain = Domain(self.data.domain.attributes, class_vars=class_vars) + multiple_targets_data = self.data.transform(domain) + self.send_signal(self.widget.Inputs.data, multiple_targets_data) + self.assertTrue(self.widget.Error.multiple_targets_data.is_shown()) + self.assertIs(self.widget.pop_called_with(), None) + + self.send_signal(self.widget.Inputs.data, self.data) + self.assertFalse(self.widget.Error.multiple_targets_data.is_shown()) + self.assertIs(self.widget.pop_called_with(), self.data) + + +if __name__ == "__main__": + unittest.main() diff --git a/doc/visual-programming/source/index.rst b/doc/visual-programming/source/index.rst index e14eb32218d..e90d19c1bc8 100644 --- a/doc/visual-programming/source/index.rst +++ b/doc/visual-programming/source/index.rst @@ -108,6 +108,7 @@ Model widgets/model/gradientboosting widgets/model/svm widgets/model/linearregression + widgets/model/pls widgets/model/logisticregression widgets/model/naivebayes widgets/model/adaboost diff --git a/doc/visual-programming/source/widgets/model/images/PLS-Example.png b/doc/visual-programming/source/widgets/model/images/PLS-Example.png new file mode 100644 index 00000000000..d635d86f0d0 Binary files /dev/null and b/doc/visual-programming/source/widgets/model/images/PLS-Example.png differ diff --git a/doc/visual-programming/source/widgets/model/images/PLS-stamped.png b/doc/visual-programming/source/widgets/model/images/PLS-stamped.png new file mode 100644 index 00000000000..9fe4e4c9308 Binary files /dev/null and b/doc/visual-programming/source/widgets/model/images/PLS-stamped.png differ diff --git a/doc/visual-programming/source/widgets/model/pls.md b/doc/visual-programming/source/widgets/model/pls.md new file mode 100644 index 00000000000..0e46d27c04c --- /dev/null +++ b/doc/visual-programming/source/widgets/model/pls.md @@ -0,0 +1,33 @@ +# PLS + +Partial Least Squares Regression widget for multivariate data analysis. + +**Inputs** + +- Data: input dataset +- Preprocessor: preprocessing method(s) + +**Outputs** + +- Learner: PLS regression learning algorithm +- Model: trained model +- Coefficients: PLS regression coefficients + +**PLS** (Partial Least Squares) widget acts as a regressor for data with numeric target variable. In its current implementation, it is the same as linear regression, but with a different kind of regularization. Here, regularization is performed with the choice of the components - the more components, the lesser the effect of regularization. + +PLS widget can output coefficients, just like [Linear Regression](../model/linearregression.md). One can observe the effect of each variable in a [Data Table](../data/datatable.md). + +![](images/PLS-stamped.png) + +1. The learner/predictor name +2. Parameters: + - Components: the number of components of the model, which act as regularization (the more components, the lesser the regularization) + - Iteration limit: maximum iterations for stopping the algorithm +3. Press *Apply* to commit changes. If *Apply Automatically* is ticked, changes are committed automatically. + +Example +------- + +Below, is a simple workflow with *housing* dataset. We trained **PLS** and [Linear Regression](../model/linearregression.md) and evaluated their performance in [Test & Score](../evaluate/testandscore.md). + +![](images/PLS-Example.png) diff --git a/doc/widgets.json b/doc/widgets.json index 388b2efb69a..c7e27fc886b 100644 --- a/doc/widgets.json +++ b/doc/widgets.json @@ -722,6 +722,15 @@ "boost" ] }, + { + "text": "PLS", + "doc": null, + "icon": "../Orange/widgets/model/icons/PLS.svg", + "background": "#FAC1D9", + "keywords": [ + "partial least squares" + ] + }, { "text": "Curve Fit", "doc": "visual-programming/source/widgets/model/curvefit.md",