@@ -34,10 +34,9 @@ def test_table_from_frame(self):
3434 [0 , pd .Timestamp ('1724-12-20' ).timestamp ()],
3535 [0 , pd .Timestamp ('1724-12-20' ).timestamp ()],
3636 [nan , nan ]])
37- np .testing .assert_equal (table .metas .tolist (), [['a' ],
38- ['b' ],
39- ['c' ],
40- [nan ]])
37+ np .testing .assert_equal (
38+ table .metas .tolist (), [["a" ], ["b" ], ["c" ], [StringVariable .Unknown ]]
39+ )
4140 names = [var .name for var in table .domain .attributes ]
4241 types = [type (var ) for var in table .domain .attributes ]
4342 self .assertEqual (names , ['1' , '2' ])
@@ -383,7 +382,7 @@ def test_table_from_frame_timezones(self):
383382 ],
384383 )
385384
386- def test_table_from_frame_no_datetim (self ):
385+ def test_table_from_frame_no_datetime (self ):
387386 """
388387 In case when dtype of column is object and column contains numbers only,
389388 column could be recognized as a TimeVarialbe since pd.to_datetime can parse
@@ -402,6 +401,33 @@ def test_table_from_frame_no_datetim(self):
402401 # check if exactly ContinuousVariable and not subtype TimeVariable
403402 self .assertIsInstance (table .domain .attributes [0 ], DiscreteVariable )
404403
404+ def testa_table_from_frame_string (self ):
405+ """
406+ Test if string-like variables are handled correctly and nans are replaced
407+ with empty string - unknown in Orange table for string variable
408+ """
409+ from Orange .data .pandas_compat import table_from_frame
410+
411+ # s1 contains nan and s2 contains pd.Na
412+ df = pd .DataFrame (
413+ [["a" , "b" ], ["c" , "d" ], ["e" , "f" ], [np .nan , np .nan ]],
414+ columns = ["s1" , "s2" ],
415+ ).astype ({"s1" : "object" , "s2" : "string" })
416+ table = table_from_frame (df )
417+ np .testing .assert_array_equal (np .empty ((4 , 0 )), table .X )
418+ np .testing .assert_array_equal (
419+ np .array (
420+ [
421+ ["a" , "b" ],
422+ ["c" , "d" ],
423+ ["e" , "f" ],
424+ [StringVariable .Unknown , StringVariable .Unknown ],
425+ ]
426+ ),
427+ table .metas ,
428+ )
429+ self .assertTrue (all (isinstance (v , StringVariable ) for v in table .domain .metas ))
430+
405431 def test_time_variable_compatible (self ):
406432 from Orange .data .pandas_compat import table_from_frame
407433
0 commit comments