Skip to content

Fix compatibility with xgboost 3.0 #316

@WeichenXu123

Description

@WeichenXu123

xgboost_ray is incompatible with xgboost 3.0.

Reproducing code:

from xgboost_ray import RayDMatrix, RayParams, train, predict
from sklearn.datasets import load_breast_cancer
import xgboost as xgb

train_x, train_y = load_breast_cancer(return_X_y=True)
train_set = RayDMatrix(train_x, train_y)

evals_result = {}
bst = train(
    {
        "objective": "binary:logistic",
        "eval_metric": ["logloss", "error"],
    },
    train_set,
    evals_result=evals_result,
    evals=[(train_set, "train")],
    verbose_eval=False,
    ray_params=RayParams(num_actors=2, cpus_per_actor=1))

bst.save_model("model.xgb")
print("Final training error: {:.4f}".format(
    evals_result["train"]["error"][-1]))

data, labels = load_breast_cancer(return_X_y=True)

dpred = RayDMatrix(data, labels)

bst = xgb.Booster(model_file="model.xgb")
pred_ray = predict(bst, dpred, ray_params=RayParams(num_actors=2))

print(pred_ray[:5])

Error:

      6 train_set = RayDMatrix(train_x, train_y)
      8 evals_result = {}
----> 9 bst = train(
     10     {
     11         "objective": "binary:logistic",
     12         "eval_metric": ["logloss", "error"],
     13     },
     14     train_set,
     15     evals_result=evals_result,
     16     evals=[(train_set, "train")],
     17     verbose_eval=False,
     18     ray_params=RayParams(num_actors=2, cpus_per_actor=1))
     20 bst.save_model("model.xgb")
     21 print("Final training error: {:.4f}".format(
     22     evals_result["train"]["error"][-1]))

File /databricks/python/lib/python3.12/site-packages/xgboost_ray/main.py:1612, in train(...)
   1599 training_state = _TrainingState(
   1600     actors=actors,
   1601     queue=queue,
   ...
   1608     pending_actors=pending_actors,
   1609 )
   1611 try:
-> 1612     bst, train_evals_result, train_additional_results = _train(
   1613         params,
   1614         dtrain,
   1615         boost_rounds_left,
   1616         *args,
   1617         evals=evals,
   1618         ray_params=ray_params,
   1619         cpus_per_actor=cpus_per_actor,
   1620         gpus_per_actor=gpus_per_actor,
   1621         _training_state=training_state,
   1622         **kwargs,
   1623     )
   1624     if training_state.training_started_at > 0.0:
   1625         total_training_time += time.time() - training_state.training_started_at

File /databricks/python/lib/python3.12/site-packages/xgboost_ray/main.py:1194, in _train(...)
   1191 maybe_log("[RayXGBoost] Starting XGBoost training.")
   1193 # Start Rabit tracker for gradient sharing
-> 1194 rabit_process, rabit_args = _start_rabit_tracker(alive_actors)
   1196 # Load checkpoint if we have one. In that case we need to adjust the
   1197 # number of training rounds.
   1198 if _training_state.checkpoint.value:

File /databricks/python/lib/python3.12/site-packages/xgboost_ray/main.py:261, in _start_rabit_tracker(...)
    257 host = get_node_ip_address()
    259 env = {"DMLC_NUM_WORKER": num_workers}
--> 261 rabit_tracker = _RabitTracker(host, num_workers)

File /databricks/python/lib/python3.12/site-packages/xgboost/core.py:729, in require_keyword_args.<locals>.throw_if.<locals>.inner_f(...)
--> 729 return func(**kwargs)

File /databricks/python/lib/python3.12/site-packages/xgboost/tracker.py:98, in RabitTracker.__init__(...)
     96     raise ValueError("Expecting either 'host' or 'task' for sortby.")
     97 if host_ip is not None:
---> 98     get_family(host_ip)  # use python socket to stop early for invalid address
     99 args = make_jcargs(
    100     host=host_ip,
    101     n_workers=n_workers,
    ...
    105     timeout=int(timeout),
    106 )
    107 _check_call(_LIB.XGTrackerCreate(args, ctypes.byref(handle)))

File /databricks/python/lib/python3.12/site-packages/xgboost/tracker.py:14, in get_family(addr)
---> 14 return socket.getaddrinfo(addr, None)[0][0]

File /usr/lib/python3.12/socket.py:963, in getaddrinfo(host, port, family, type, proto, flags)
--> 963 for res in _socket.getaddrinfo(host, port, family, type, proto, flags):

TypeError: getaddrinfo() argument 1 must be string or None

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions