@@ -34,10 +34,9 @@ def test_table_from_frame(self):
3434 [0 , pd .Timestamp ('1724-12-20' ).timestamp ()],
3535 [0 , pd .Timestamp ('1724-12-20' ).timestamp ()],
3636 [nan , nan ]])
37- np .testing .assert_equal (table .metas .tolist (), [['a' ],
38- ['b' ],
39- ['c' ],
40- [nan ]])
37+ np .testing .assert_equal (
38+ table .metas .tolist (), [["a" ], ["b" ], ["c" ], [StringVariable .Unknown ]]
39+ )
4140 names = [var .name for var in table .domain .attributes ]
4241 types = [type (var ) for var in table .domain .attributes ]
4342 self .assertEqual (names , ['1' , '2' ])
@@ -63,10 +62,9 @@ def test_table_from_frame(self):
6362 [1 , 0 , pd .Timestamp ('1724-12-20' ).timestamp ()],
6463 [0 , 0 , pd .Timestamp ('1724-12-20' ).timestamp ()],
6564 [0 , nan , nan ]])
66- np .testing .assert_equal (table .metas .tolist (), [['a' ],
67- ['b' ],
68- ['c' ],
69- [nan ]])
65+ np .testing .assert_equal (
66+ table .metas .tolist (), [["a" ], ["b" ], ["c" ], [StringVariable .Unknown ]]
67+ )
7068 names = [var .name for var in table .domain .attributes ]
7169 types = [type (var ) for var in table .domain .attributes ]
7270 self .assertEqual (names , ['index' , '1' , '2' ])
@@ -383,7 +381,7 @@ def test_table_from_frame_timezones(self):
383381 ],
384382 )
385383
386- def test_table_from_frame_no_datetim (self ):
384+ def test_table_from_frame_no_datetime (self ):
387385 """
388386 In case when dtype of column is object and column contains numbers only,
389387 column could be recognized as a TimeVarialbe since pd.to_datetime can parse
@@ -402,6 +400,34 @@ def test_table_from_frame_no_datetim(self):
402400 # check if exactly ContinuousVariable and not subtype TimeVariable
403401 self .assertIsInstance (table .domain .attributes [0 ], DiscreteVariable )
404402
403+ def testa_table_from_frame_string (self ):
404+ """
405+ Test if string-like variables are handled correctly and nans are replaced
406+ with String.Unknown
407+ """
408+ from Orange .data .pandas_compat import table_from_frame
409+
410+ # s1 contains nan and s2 contains pd.Na
411+ df = pd .DataFrame (
412+ [["a" , "b" ], ["c" , "d" ], ["e" , "f" ], [5 , "c" ], [np .nan , np .nan ]],
413+ columns = ["s1" , "s2" ],
414+ ).astype ({"s1" : "object" , "s2" : "string" })
415+ table = table_from_frame (df )
416+ np .testing .assert_array_equal (np .empty ((5 , 0 )), table .X )
417+ np .testing .assert_array_equal (
418+ np .array (
419+ [
420+ ["a" , "b" ],
421+ ["c" , "d" ],
422+ ["e" , "f" ],
423+ ["5" , "c" ],
424+ [StringVariable .Unknown , StringVariable .Unknown ],
425+ ]
426+ ),
427+ table .metas ,
428+ )
429+ self .assertTrue (all (isinstance (v , StringVariable ) for v in table .domain .metas ))
430+
405431 def test_time_variable_compatible (self ):
406432 from Orange .data .pandas_compat import table_from_frame
407433
0 commit comments