Skip to content

Commit 8b5b505

Browse files
committed
table_from_frame: replace nan with "" for string variable
1 parent c860359 commit 8b5b505

File tree

2 files changed

+20
-2
lines changed

2 files changed

+20
-2
lines changed

Orange/data/pandas_compat.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -280,7 +280,7 @@ def vars_from_df(df, role=None, force_nominal=False):
280280
raise ValueError("String variable must be in metas.")
281281
_role = Role.Meta
282282
var = StringVariable(str(column))
283-
expr = lambda s, _: np.asarray(s, dtype=object)
283+
expr = lambda s, _: np.asarray(s.fillna(""), dtype=object)
284284

285285
cols[_role].append(column)
286286
exprs[_role].append(expr)

Orange/data/tests/test_pandas.py

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -383,7 +383,7 @@ def test_table_from_frame_timezones(self):
383383
],
384384
)
385385

386-
def test_table_from_frame_no_datetim(self):
386+
def test_table_from_frame_no_datetime(self):
387387
"""
388388
In case when dtype of column is object and column contains numbers only,
389389
column could be recognized as a TimeVarialbe since pd.to_datetime can parse
@@ -402,6 +402,24 @@ def test_table_from_frame_no_datetim(self):
402402
# check if exactly ContinuousVariable and not subtype TimeVariable
403403
self.assertIsInstance(table.domain.attributes[0], DiscreteVariable)
404404

405+
def testa_table_from_frame_string(self):
406+
"""
407+
Test if string-like variables are handled correctly and nans are replaced
408+
with empty string - unknown in Orange table for string variable
409+
"""
410+
from Orange.data.pandas_compat import table_from_frame
411+
412+
# s1 contains nan and s2 contains pd.Na
413+
df = pd.DataFrame(
414+
[["a", "b"], ["c", "d"], ["e", "f"], [np.nan, np.nan]],
415+
columns=["s1", "s2"],
416+
).astype({"s1": "object", "s2": "string"})
417+
table = table_from_frame(df)
418+
np.testing.assert_array_equal(np.empty((4, 0)), table.X)
419+
np.testing.assert_array_equal(
420+
np.array([["a", "b"], ["c", "d"], ["e", "f"], ["", ""]]), table.metas
421+
)
422+
405423
def test_time_variable_compatible(self):
406424
from Orange.data.pandas_compat import table_from_frame
407425

0 commit comments

Comments
 (0)