@@ -34,10 +34,9 @@ def test_table_from_frame(self):
3434 [0 , pd .Timestamp ('1724-12-20' ).timestamp ()],
3535 [0 , pd .Timestamp ('1724-12-20' ).timestamp ()],
3636 [nan , nan ]])
37- np .testing .assert_equal (table .metas .tolist (), [['a' ],
38- ['b' ],
39- ['c' ],
40- [nan ]])
37+ np .testing .assert_equal (
38+ table .metas .tolist (), [["a" ], ["b" ], ["c" ], [StringVariable .Unknown ]]
39+ )
4140 names = [var .name for var in table .domain .attributes ]
4241 types = [type (var ) for var in table .domain .attributes ]
4342 self .assertEqual (names , ['1' , '2' ])
@@ -383,7 +382,7 @@ def test_table_from_frame_timezones(self):
383382 ],
384383 )
385384
386- def test_table_from_frame_no_datetim (self ):
385+ def test_table_from_frame_no_datetime (self ):
387386 """
388387 In case when dtype of column is object and column contains numbers only,
389388 column could be recognized as a TimeVarialbe since pd.to_datetime can parse
@@ -402,6 +401,25 @@ def test_table_from_frame_no_datetim(self):
402401 # check if exactly ContinuousVariable and not subtype TimeVariable
403402 self .assertIsInstance (table .domain .attributes [0 ], DiscreteVariable )
404403
404+ def testa_table_from_frame_string (self ):
405+ """
406+ Test if string-like variables are handled correctly and nans are replaced
407+ with empty string - unknown in Orange table for string variable
408+ """
409+ from Orange .data .pandas_compat import table_from_frame
410+
411+ # s1 contains nan and s2 contains pd.Na
412+ df = pd .DataFrame (
413+ [["a" , "b" ], ["c" , "d" ], ["e" , "f" ], [np .nan , np .nan ]],
414+ columns = ["s1" , "s2" ],
415+ ).astype ({"s1" : "object" , "s2" : "string" })
416+ table = table_from_frame (df )
417+ np .testing .assert_array_equal (np .empty ((4 , 0 )), table .X )
418+ np .testing .assert_array_equal (
419+ np .array ([["a" , "b" ], ["c" , "d" ], ["e" , "f" ], [StringVariable .Unknown , StringVariable .Unknown ]]), table .metas
420+ )
421+ self .assertTrue (all (isinstance (v , StringVariable ) for v in table .domain .metas ))
422+
405423 def test_time_variable_compatible (self ):
406424 from Orange .data .pandas_compat import table_from_frame
407425
0 commit comments