Skip to content

Commit dfac767

Browse files
committed
suggestions.
1 parent e22f510 commit dfac767

File tree

1 file changed

+6
-2
lines changed

1 file changed

+6
-2
lines changed

python-package/xgboost/_data_utils.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -311,10 +311,14 @@ def get_n_bytes(typ: Type) -> int:
311311
return off_len * (np.iinfo(typ).bits // 8)
312312

313313
if offset.size == get_n_bytes(np.int64):
314+
if not isinstance(cats, pa.LargeStringArray):
315+
raise TypeError(
316+
"Expecting `pyarrow.StringArray` or `pyarrow.LargeStringArray`,"
317+
f" got: {type(cats)}."
318+
)
314319
# Convert to 32bit integer, arrow recommends against the use of i64. Also,
315320
# XGBoost cannot handle large number of categories (> 2**31).
316-
assert isinstance(cats, pa.LargeStringArray), type(cats)
317-
i32cats = pa.Array.from_pandas(cats.to_numpy(zero_copy_only=False))
321+
i32cats = cats.cast(pa.string())
318322
mask, offset, data = i32cats.buffers()
319323

320324
if offset.size != get_n_bytes(np.int32):

0 commit comments

Comments
 (0)