Skip to content

Commit 94fa21a

Browse files
committed
OWEditDomain: fix merge variables when missing data
1 parent db4e7ce commit 94fa21a

File tree

2 files changed

+48
-3
lines changed

2 files changed

+48
-3
lines changed

Orange/widgets/data/oweditdomain.py

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -680,7 +680,8 @@ class GroupItemsDialog(QDialog):
680680
DEFAULT_LABEL = "other"
681681

682682
def __init__(
683-
self, variable: Categorical, data: Union[np.ndarray, List],
683+
self, variable: Categorical,
684+
data: Union[np.ndarray, List, MArray],
684685
selected_attributes: List[str], dialog_settings: Dict[str, Any],
685686
parent: QWidget = None, flags: Qt.WindowFlags = Qt.Dialog, **kwargs
686687
) -> None:
@@ -814,10 +815,18 @@ def get_merge_attributes(self) -> List[str]:
814815
-------
815816
List of attributes' to be merged names
816817
"""
817-
counts = Counter(self.data)
818818
if self.selected_radio.isChecked():
819819
return self.selected_attributes
820-
elif self.n_values_radio.isChecked():
820+
821+
if isinstance(self.data, MArray):
822+
non_nan = self.data[~self.data.mask]
823+
elif isinstance(self.data, np.ndarray):
824+
non_nan = self.data[~np.isnan(self.data)]
825+
else: # list
826+
non_nan = [x for x in self.data if x is not None]
827+
828+
counts = Counter(non_nan)
829+
if self.n_values_radio.isChecked():
821830
keep_values = self.n_values_spin.value()
822831
values = counts.most_common()[keep_values:]
823832
indices = [i for i, _ in values]
@@ -828,6 +837,8 @@ def get_merge_attributes(self) -> List[str]:
828837
n_all = sum(counts.values())
829838
indices = [v for v, c in counts.most_common()
830839
if c / n_all * 100 < self.frequent_rel_spin.value()]
840+
841+
indices = np.array(indices, dtype=int) # indices must be ints
831842
return np.array(self.variable.categories)[indices].tolist()
832843

833844
def get_merged_value_name(self) -> str:

Orange/widgets/data/tests/test_oweditdomain.py

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1026,6 +1026,40 @@ def test_group_keep_n(self):
10261026
dialog.n_values_spin.setValue(3)
10271027
self.assertListEqual(dialog.get_merge_attributes(), [])
10281028

1029+
def test_group_less_frequent_missing(self):
1030+
"""
1031+
Widget gives MaskedArray to GroupItemsDialog which can have missing
1032+
values.
1033+
gh-4599
1034+
"""
1035+
def _test_correctness():
1036+
dialog.frequent_abs_radio.setChecked(True)
1037+
dialog.frequent_abs_spin.setValue(3)
1038+
self.assertListEqual(dialog.get_merge_attributes(), ["b", "c"])
1039+
1040+
dialog.frequent_rel_radio.setChecked(True)
1041+
dialog.frequent_rel_spin.setValue(50)
1042+
self.assertListEqual(dialog.get_merge_attributes(), ["b", "c"])
1043+
1044+
dialog.n_values_radio.setChecked(True)
1045+
dialog.n_values_spin.setValue(1)
1046+
self.assertListEqual(dialog.get_merge_attributes(), ["b", "c"])
1047+
1048+
# masked array
1049+
data_masked = np.ma.array(
1050+
[0, 0, np.nan, 0, 1, 1, 2], mask=[0, 0, 1, 0, 0, 0, 0]
1051+
)
1052+
dialog = GroupItemsDialog(self.v, data_masked, [], {})
1053+
_test_correctness()
1054+
1055+
data_array = np.array([0, 0, np.nan, 0, 1, 1, 2])
1056+
dialog = GroupItemsDialog(self.v, data_array, [], {})
1057+
_test_correctness()
1058+
1059+
data_list = [0, 0, None, 0, 1, 1, 2]
1060+
dialog = GroupItemsDialog(self.v, data_list, [], {})
1061+
_test_correctness()
1062+
10291063

10301064
if __name__ == '__main__':
10311065
unittest.main()

0 commit comments

Comments
 (0)