Skip to content

Commit 6a3d8c2

Browse files
committed
OWEditDomain: fix merge variables when missing data
1 parent db4e7ce commit 6a3d8c2

File tree

2 files changed

+49
-3
lines changed

2 files changed

+49
-3
lines changed

Orange/widgets/data/oweditdomain.py

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
Qt, QEvent, QSize, QModelIndex, QAbstractItemModel, QPersistentModelIndex
3030
)
3131
from AnyQt.QtCore import pyqtSignal as Signal, pyqtSlot as Slot
32+
from numpy.ma import MaskedArray
3233

3334
import Orange.data
3435

@@ -680,7 +681,8 @@ class GroupItemsDialog(QDialog):
680681
DEFAULT_LABEL = "other"
681682

682683
def __init__(
683-
self, variable: Categorical, data: Union[np.ndarray, List],
684+
self, variable: Categorical,
685+
data: Union[np.ndarray, List, MaskedArray],
684686
selected_attributes: List[str], dialog_settings: Dict[str, Any],
685687
parent: QWidget = None, flags: Qt.WindowFlags = Qt.Dialog, **kwargs
686688
) -> None:
@@ -814,10 +816,18 @@ def get_merge_attributes(self) -> List[str]:
814816
-------
815817
List of attributes' to be merged names
816818
"""
817-
counts = Counter(self.data)
818819
if self.selected_radio.isChecked():
819820
return self.selected_attributes
820-
elif self.n_values_radio.isChecked():
821+
822+
if isinstance(self.data, MaskedArray):
823+
non_nan = self.data[~self.data.mask]
824+
elif isinstance(self.data, np.ndarray):
825+
non_nan = self.data[~np.isnan(self.data)]
826+
else: # list
827+
non_nan = [x for x in self.data if x is not None]
828+
829+
counts = Counter(non_nan)
830+
if self.n_values_radio.isChecked():
821831
keep_values = self.n_values_spin.value()
822832
values = counts.most_common()[keep_values:]
823833
indices = [i for i, _ in values]
@@ -828,6 +838,8 @@ def get_merge_attributes(self) -> List[str]:
828838
n_all = sum(counts.values())
829839
indices = [v for v, c in counts.most_common()
830840
if c / n_all * 100 < self.frequent_rel_spin.value()]
841+
842+
indices = np.array(indices, dtype=int) # indices must be ints
831843
return np.array(self.variable.categories)[indices].tolist()
832844

833845
def get_merged_value_name(self) -> str:

Orange/widgets/data/tests/test_oweditdomain.py

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1026,6 +1026,40 @@ def test_group_keep_n(self):
10261026
dialog.n_values_spin.setValue(3)
10271027
self.assertListEqual(dialog.get_merge_attributes(), [])
10281028

1029+
def test_group_less_frequent_missing(self):
1030+
"""
1031+
Widget gives MaskedArray to GroupItemsDialog which can have missing
1032+
values.
1033+
gh-4599
1034+
"""
1035+
def _test_correctness():
1036+
dialog.frequent_abs_radio.setChecked(True)
1037+
dialog.frequent_abs_spin.setValue(3)
1038+
self.assertListEqual(dialog.get_merge_attributes(), ["b", "c"])
1039+
1040+
dialog.frequent_rel_radio.setChecked(True)
1041+
dialog.frequent_rel_spin.setValue(50)
1042+
self.assertListEqual(dialog.get_merge_attributes(), ["b", "c"])
1043+
1044+
dialog.n_values_radio.setChecked(True)
1045+
dialog.n_values_spin.setValue(1)
1046+
self.assertListEqual(dialog.get_merge_attributes(), ["b", "c"])
1047+
1048+
# masked array
1049+
data_masked = np.ma.array(
1050+
[0, 0, np.nan, 0, 1, 1, 2], mask=[0, 0, 1, 0, 0, 0, 0]
1051+
)
1052+
dialog = GroupItemsDialog(self.v, data_masked, [], {})
1053+
_test_correctness()
1054+
1055+
data_array = np.array([0, 0, np.nan, 0, 1, 1, 2])
1056+
dialog = GroupItemsDialog(self.v, data_array, [], {})
1057+
_test_correctness()
1058+
1059+
data_list = [0, 0, None, 0, 1, 1, 2]
1060+
dialog = GroupItemsDialog(self.v, data_list, [], {})
1061+
_test_correctness()
1062+
10291063

10301064
if __name__ == '__main__':
10311065
unittest.main()

0 commit comments

Comments
 (0)