Skip to content

Commit 32a401c

Browse files
correct descibe_null for arrow and numpy
1 parent 177758a commit 32a401c

File tree

2 files changed

+24
-7
lines changed

2 files changed

+24
-7
lines changed

packages/vaex-core/vaex/dataframe_protocol.py

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -422,8 +422,19 @@ def describe_null(self) -> Tuple[int, Any]:
422422
kind = self.dtype[0]
423423
value = None
424424
if kind in (_k.INT, _k.UINT, _k.FLOAT, _k.BOOL, _k.CATEGORICAL):
425-
null = 3
426-
value = 1
425+
if self._col.dtype.is_arrow:
426+
# arrow arrays always allow for null values
427+
# where 0 encodes a null/missing value
428+
null = 3
429+
value = 0
430+
elif self._col.is_masked:
431+
# masked arrays are always numpy.ma arrays
432+
null = 4
433+
value = 1
434+
else:
435+
# otherwise we have a normal numpy array
436+
null = 0
437+
value = None
427438
else:
428439
raise NotImplementedError(f"Data type {self.dtype} not yet supported")
429440

tests/dataframe_protocol_test.py

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,10 @@ def test_mixed_intfloatbool(df_factory):
5757

5858
with pytest.raises(TypeError):
5959
assert df2.__dataframe__().get_column_by_name("y").describe_categorical
60-
assert df2.__dataframe__().get_column_by_name("y").describe_null == (3, 1)
60+
if df2['y'].dtype.is_arrow:
61+
assert df2.__dataframe__().get_column_by_name("y").describe_null == (3, 0)
62+
else:
63+
assert df2.__dataframe__().get_column_by_name("y").describe_null == (0, None)
6164

6265
assert_dataframe_equal(df.__dataframe__(), df)
6366

@@ -127,17 +130,17 @@ def test_categorical_ordinal():
127130
col = df.__dataframe__().get_column_by_name("colors")
128131
assert col.dtype[0] == _DtypeKind.CATEGORICAL
129132
assert col.describe_categorical == (False, True, {0: "red", 1: "green", 2: "blue"})
130-
assert col.describe_null == (3, 1)
133+
assert col.describe_null == (0, None)
131134
assert col.dtype == (23, 64, "u", "=")
132135
col2 = df.__dataframe__().get_column_by_name("year")
133136
assert col2.dtype[0] == _DtypeKind.CATEGORICAL
134137
assert col2.describe_categorical == (False, True, {0: 2012, 1: 2013, 2: 2014, 3: 2015, 4: 2016, 5: 2017, 6: 2018, 7: 2019})
135-
assert col2.describe_null == (3, 1)
138+
assert col2.describe_null == (0, None)
136139
assert col2.dtype == (23, 64, "u", "=")
137140
col3 = df.__dataframe__().get_column_by_name("weekday")
138141
assert col3.dtype[0] == _DtypeKind.CATEGORICAL
139142
assert col3.describe_categorical == (False, True, {0: "Mon", 1: "Tue", 2: "Wed", 3: "Thu", 4: "Fri", 5: "Sat", 6: "Sun"})
140-
assert col3.describe_null == (3, 1)
143+
assert col3.describe_null == (0, None)
141144
assert col3.dtype == (23, 64, "u", "=")
142145

143146
df2 = _from_dataframe_to_vaex(df.__dataframe__())
@@ -158,7 +161,10 @@ def test_arrow_dictionary():
158161
col = df.__dataframe__().get_column_by_name("x")
159162
assert col.dtype[0] == _DtypeKind.CATEGORICAL
160163
assert col.describe_categorical == (False, True, {0: "foo", 1: "bar", 2: "baz"})
161-
assert col.describe_null == (3, 1)
164+
if df['x'].dtype.is_arrow:
165+
assert col.describe_null == (3, 0)
166+
else:
167+
assert col.describe_null == (0, None)
162168
assert col.dtype == (23, 64, "u", "=")
163169

164170
df2 = _from_dataframe_to_vaex(df.__dataframe__())

0 commit comments

Comments
 (0)