Skip to content

Commit 4cb5f1f

Browse files
committed
Transform: Replace 'Preprocess' input with 'Template Data' input
1 parent eca8c46 commit 4cb5f1f

File tree

2 files changed

+103
-84
lines changed

2 files changed

+103
-84
lines changed

Orange/widgets/data/owtransform.py

Lines changed: 54 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,10 @@
1+
from typing import Optional
2+
3+
import numpy as np
4+
15
from Orange.data import Table, Domain
2-
from Orange.preprocess.preprocess import Preprocess, Discretize
36
from Orange.widgets import gui
7+
from Orange.widgets.report.report import describe_data
48
from Orange.widgets.settings import Setting
59
from Orange.widgets.utils.sql import check_sql_input
610
from Orange.widgets.utils.widgetpreview import WidgetPreview
@@ -18,55 +22,56 @@ class OWTransform(OWWidget):
1822

1923
class Inputs:
2024
data = Input("Data", Table, default=True)
21-
preprocessor = Input("Preprocessor", Preprocess)
25+
template_data = Input("Template Data", Table)
2226

2327
class Outputs:
2428
transformed_data = Output("Transformed Data", Table)
2529

2630
class Error(OWWidget.Error):
27-
pp_error = Msg("An error occurred while transforming data.\n{}")
31+
error = Msg("An error occurred while transforming data.\n{}")
2832

2933
resizing_enabled = False
3034
want_main_area = False
3135

3236
def __init__(self):
3337
super().__init__()
34-
self.data = None
35-
self.preprocessor = None
36-
self.transformed_data = None
38+
self.data = None # type: Optional[Table]
39+
self.template_domain = None # type: Optional[Domain]
40+
self.transformed_info = describe_data(None) # type: OrderedDict
3741

3842
info_box = gui.widgetBox(self.controlArea, "Info")
3943
self.input_label = gui.widgetLabel(info_box, "")
40-
self.preprocessor_label = gui.widgetLabel(info_box, "")
44+
self.template_label = gui.widgetLabel(info_box, "")
4145
self.output_label = gui.widgetLabel(info_box, "")
4246
self.set_input_label_text()
43-
self.set_preprocessor_label_text()
47+
self.set_template_label_text()
4448

4549
box = gui.widgetBox(self.controlArea, "Output")
4650
gui.checkBox(box, self, "retain_all_data", "Retain all data",
4751
callback=self.apply)
4852

4953
def set_input_label_text(self):
5054
text = "No data on input."
51-
if self.data is not None:
55+
if self.data:
5256
text = "Input data with {:,} instances and {:,} features.".format(
5357
len(self.data),
5458
len(self.data.domain.attributes))
5559
self.input_label.setText(text)
5660

57-
def set_preprocessor_label_text(self):
58-
text = "No preprocessor on input."
59-
if self.transformed_data is not None:
60-
text = "Preprocessor {} applied.".format(self.preprocessor)
61-
elif self.preprocessor is not None:
62-
text = "Preprocessor {} on input.".format(self.preprocessor)
63-
self.preprocessor_label.setText(text)
61+
def set_template_label_text(self):
62+
text = "No template data on input."
63+
if self.data and self.template_domain is not None:
64+
text = "Template domain applied."
65+
elif self.template_domain is not None:
66+
text = "Template data includes {:,} features.".format(
67+
len(self.template_domain.attributes))
68+
self.template_label.setText(text)
6469

65-
def set_output_label_text(self):
70+
def set_output_label_text(self, data):
6671
text = ""
67-
if self.transformed_data:
72+
if data:
6873
text = "Output data includes {:,} features.".format(
69-
len(self.transformed_data.domain.attributes))
74+
len(data.domain.attributes))
7075
self.output_label.setText(text)
7176

7277
@Inputs.data
@@ -75,49 +80,53 @@ def set_data(self, data):
7580
self.data = data
7681
self.set_input_label_text()
7782

78-
@Inputs.preprocessor
79-
def set_preprocessor(self, preprocessor):
80-
self.preprocessor = preprocessor
83+
@Inputs.template_data
84+
@check_sql_input
85+
def set_template_data(self, data):
86+
self.template_domain = data and data.domain
8187

8288
def handleNewSignals(self):
8389
self.apply()
8490

8591
def apply(self):
8692
self.clear_messages()
87-
self.transformed_data = None
88-
if self.data is not None and self.preprocessor is not None:
93+
transformed_data = None
94+
if self.data and self.template_domain is not None:
8995
try:
90-
self.transformed_data = self.preprocessor(self.data)
91-
except Exception as ex: # pylint: disable=broad-except
92-
self.Error.pp_error(ex)
93-
94-
data = self.merged_data() if self.retain_all_data \
95-
else self.transformed_data
96+
transformed_data = self.data.transform(self.template_domain)
97+
except Exception as ex: # pylint: disable=broad-except
98+
self.Error.error(ex)
99+
100+
data = transformed_data
101+
if data and self.retain_all_data:
102+
data = self.merged_data(data)
103+
self.transformed_info = describe_data(data)
96104
self.Outputs.transformed_data.send(data)
105+
self.set_template_label_text()
106+
self.set_output_label_text(data)
97107

98-
self.set_preprocessor_label_text()
99-
self.set_output_label_text()
100-
101-
def merged_data(self):
108+
def merged_data(self, t_data):
102109
domain = self.data.domain
103-
metas = domain.metas + self.transformed_data.domain.attributes
110+
t_domain = t_data.domain
111+
metas = domain.metas + t_domain.attributes + t_domain.metas
104112
domain = Domain(domain.attributes, domain.class_vars, metas)
105113
data = self.data.transform(domain)
106-
n = self.transformed_data.X.shape[1]
107-
data.metas[:, -n:] = self.transformed_data.X
114+
metas = np.hstack((t_data.X, t_data.metas))
115+
data.metas[:, -metas.shape[1]:] = metas
108116
return data
109117

110118
def send_report(self):
111-
if self.preprocessor is not None:
112-
self.report_items("Settings",
113-
(("Preprocessor", self.preprocessor),))
114-
if self.data is not None:
119+
if self.data:
115120
self.report_data("Data", self.data)
116-
if self.transformed_data is not None:
117-
self.report_data("Transformed data", self.transformed_data)
121+
if self.template_domain is not None:
122+
self.report_domain("Template data", self.template_domain)
123+
if self.transformed_info:
124+
self.report_items("Transformed data", self.transformed_info)
118125

119126

120127
if __name__ == "__main__": # pragma: no cover
128+
from Orange.preprocess import Discretize
129+
130+
table = Table("iris")
121131
WidgetPreview(OWTransform).run(
122-
set_data=Table("iris"),
123-
set_preprocessor=Discretize())
132+
set_data=table, set_template_data=Discretize()(table))

Orange/widgets/data/tests/test_owtransform.py

Lines changed: 49 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,11 @@
11
# Test methods with long descriptive names can omit docstrings
22
# pylint: disable=missing-docstring
3+
from unittest.mock import Mock
4+
5+
from numpy import testing as npt
6+
37
from Orange.data import Table
48
from Orange.preprocess import Discretize, Continuize
5-
from Orange.preprocess.preprocess import Preprocess
69
from Orange.widgets.data.owtransform import OWTransform
710
from Orange.widgets.tests.base import WidgetTest
811
from Orange.widgets.unsupervised.owpca import OWPCA
@@ -12,39 +15,39 @@ class TestOWTransform(WidgetTest):
1215
def setUp(self):
1316
self.widget = self.create_widget(OWTransform)
1417
self.data = Table("iris")
15-
self.preprocessor = Discretize()
18+
self.disc_data = Discretize()(self.data)
1619

1720
def test_output(self):
18-
# send data and preprocessor
19-
self.send_signal(self.widget.Inputs.data, self.data)
20-
self.send_signal(self.widget.Inputs.preprocessor, self.preprocessor)
21+
# send data and template data
22+
self.send_signal(self.widget.Inputs.data, self.data[::15])
23+
self.send_signal(self.widget.Inputs.template_data, self.disc_data)
2124
output = self.get_output(self.widget.Outputs.transformed_data)
22-
self.assertIsInstance(output, Table)
23-
self.assertEqual("Input data with 150 instances and 4 features.",
25+
self.assertTableEqual(output, self.disc_data[::15])
26+
self.assertEqual("Input data with 10 instances and 4 features.",
2427
self.widget.input_label.text())
25-
self.assertEqual("Preprocessor Discretize() applied.",
26-
self.widget.preprocessor_label.text())
28+
self.assertEqual("Template domain applied.",
29+
self.widget.template_label.text())
2730
self.assertEqual("Output data includes 4 features.",
2831
self.widget.output_label.text())
2932

30-
# remove preprocessor
31-
self.send_signal(self.widget.Inputs.preprocessor, None)
33+
# remove template data
34+
self.send_signal(self.widget.Inputs.template_data, None)
3235
output = self.get_output(self.widget.Outputs.transformed_data)
3336
self.assertIsNone(output)
34-
self.assertEqual("Input data with 150 instances and 4 features.",
37+
self.assertEqual("Input data with 10 instances and 4 features.",
3538
self.widget.input_label.text())
36-
self.assertEqual("No preprocessor on input.",
37-
self.widget.preprocessor_label.text())
39+
self.assertEqual("No template data on input.",
40+
self.widget.template_label.text())
3841
self.assertEqual("", self.widget.output_label.text())
3942

40-
# send preprocessor
41-
self.send_signal(self.widget.Inputs.preprocessor, self.preprocessor)
43+
# send template data
44+
self.send_signal(self.widget.Inputs.template_data, self.disc_data)
4245
output = self.get_output(self.widget.Outputs.transformed_data)
43-
self.assertIsInstance(output, Table)
44-
self.assertEqual("Input data with 150 instances and 4 features.",
46+
self.assertTableEqual(output, self.disc_data[::15])
47+
self.assertEqual("Input data with 10 instances and 4 features.",
4548
self.widget.input_label.text())
46-
self.assertEqual("Preprocessor Discretize() applied.",
47-
self.widget.preprocessor_label.text())
49+
self.assertEqual("Template domain applied.",
50+
self.widget.template_label.text())
4851
self.assertEqual("Output data includes 4 features.",
4952
self.widget.output_label.text())
5053

@@ -53,48 +56,55 @@ def test_output(self):
5356
output = self.get_output(self.widget.Outputs.transformed_data)
5457
self.assertIsNone(output)
5558
self.assertEqual("No data on input.", self.widget.input_label.text())
56-
self.assertEqual("Preprocessor Discretize() on input.",
57-
self.widget.preprocessor_label.text())
59+
self.assertEqual("Template data includes 4 features.",
60+
self.widget.template_label.text())
5861
self.assertEqual("", self.widget.output_label.text())
5962

60-
# remove preprocessor
61-
self.send_signal(self.widget.Inputs.preprocessor, None)
63+
# remove template data
64+
self.send_signal(self.widget.Inputs.template_data, None)
6265
self.assertEqual("No data on input.", self.widget.input_label.text())
63-
self.assertEqual("No preprocessor on input.",
64-
self.widget.preprocessor_label.text())
66+
self.assertEqual("No template data on input.",
67+
self.widget.template_label.text())
6568
self.assertEqual("", self.widget.output_label.text())
6669

67-
def test_input_pca_preprocessor(self):
70+
def assertTableEqual(self, table1, table2):
71+
self.assertIs(table1.domain, table2.domain)
72+
npt.assert_array_equal(table1.X, table2.X)
73+
npt.assert_array_equal(table1.Y, table2.Y)
74+
npt.assert_array_equal(table1.metas, table2.metas)
75+
76+
def test_input_pca_output(self):
6877
owpca = self.create_widget(OWPCA)
6978
self.send_signal(owpca.Inputs.data, self.data, widget=owpca)
7079
owpca.components_spin.setValue(2)
71-
pp = self.get_output(owpca.Outputs.preprocessor, widget=owpca)
72-
self.assertIsNotNone(pp, Preprocess)
80+
pca_out = self.get_output(owpca.Outputs.transformed_data, widget=owpca)
7381

74-
self.send_signal(self.widget.Inputs.data, self.data)
75-
self.send_signal(self.widget.Inputs.preprocessor, pp)
82+
self.send_signal(self.widget.Inputs.data, self.data[::10])
83+
self.send_signal(self.widget.Inputs.template_data, pca_out)
7684
output = self.get_output(self.widget.Outputs.transformed_data)
77-
self.assertIsInstance(output, Table)
78-
self.assertEqual(output.X.shape, (len(self.data), 2))
85+
npt.assert_array_equal(pca_out.X[::10], output.X)
7986

8087
def test_retain_all_data(self):
8188
data = Table("zoo")
89+
cont_data = Continuize()(data)
8290
self.send_signal(self.widget.Inputs.data, data)
83-
self.send_signal(self.widget.Inputs.preprocessor, Continuize())
91+
self.send_signal(self.widget.Inputs.template_data, cont_data)
8492
self.widget.controls.retain_all_data.click()
8593
output = self.get_output(self.widget.Outputs.transformed_data)
8694
self.assertIsInstance(output, Table)
8795
self.assertEqual(output.X.shape, (len(data), 16))
88-
self.assertEqual(output.metas.shape, (len(data), 37))
96+
self.assertEqual(output.metas.shape, (len(data), 38))
8997

9098
def test_error_transforming(self):
91-
self.send_signal(self.widget.Inputs.data, self.data)
92-
self.send_signal(self.widget.Inputs.preprocessor, Preprocess())
93-
self.assertTrue(self.widget.Error.pp_error.is_shown())
99+
data = self.data[::10]
100+
data.transform = Mock(side_effect=Exception())
101+
self.send_signal(self.widget.Inputs.data, data)
102+
self.send_signal(self.widget.Inputs.template_data, self.disc_data)
103+
self.assertTrue(self.widget.Error.error.is_shown())
94104
output = self.get_output(self.widget.Outputs.transformed_data)
95105
self.assertIsNone(output)
96106
self.send_signal(self.widget.Inputs.data, None)
97-
self.assertFalse(self.widget.Error.pp_error.is_shown())
107+
self.assertFalse(self.widget.Error.error.is_shown())
98108

99109
def test_send_report(self):
100110
self.send_signal(self.widget.Inputs.data, self.data)

0 commit comments

Comments
 (0)