-
Notifications
You must be signed in to change notification settings - Fork 47
Open
Description
Hello and thanks for this project! It seems very promising!
I am trying to train a xgboost classifier.
My code is:
import pandas as pd
import numpy as np
import xgboost as xgb
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.model_selection import StratifiedKFold
from scipy.stats import uniform
from mango.domain.distribution import loguniform
from mango import Tuner
from joblib import Parallel, delayed
xgb_params = {
'n_estimators': range(10, 200, 50), # 10 to 200 in steps of 50
'max_depth': range(1, 15), # 1 to 14
'reg_alpha': loguniform(-3, 6), # 10^-3 to 10^3
'booster': ['gbtree', 'gblinear'],
'colsample_bylevel': uniform(0.05, 0.95), # 0.05 to 1.0
'colsample_bytree': uniform(0.05, 0.95), # 0.05 to 1.0
'learning_rate': loguniform(-3, 3), # 0.001 to 1
'reg_lambda': loguniform(-3, 6), # 10^-3 to 10^3
'min_child_weight': loguniform(0, 2), # 1 to 100
'subsample': uniform(0.1, 0.89), # 0.1 to 0.99
}
class MangoParallelOptimization:
def __init__(self,
njobs,
configuration_params,
features_train,
target_train,
features_val,
target_val):
self.njobs = njobs
self.conf_dict = configuration_params
self.x_train = features_train
self.y_train = target_train
self.x_val = features_val
self.y_val = target_val
self.space = xgb_params
def _objective(self, **model_params):
kfold = StratifiedKFold(n_splits=2, shuffle=True, random_state=123)
results = []
for hyper_param in model_params:
model = xgb.XGBClassifier(**hyper_param)
result = cross_val_score(model,
self.x_train,
self.y_train,
scoring='accuracy',
cv=kfold).mean()
results.append(result)
return results
def _objective2(self, params_batch):
global parameters
results_batch = Parallel(self.njobs,
backend='multiprocessing')
(delayed(self._objective)(**params) for params in params_batch)
acc = [result for result in results_batch]
return acc
def mango_optimization(self):
tuner = Tuner(self.space, self._objective2, self.conf_dict)
optimization_results = tuner.maximize()
return optimization_results['best_params'], optimization_results['best_objective']
if __name__=="__main__":
df = pd.read_csv('/home/ggous/example.csv')
df.dropna(axis=1, inplace=True)
features = df.drop(['id', 'CLASS'], axis=1)
labels = df['CLASS'].values
# Split the data into training and testing sets
x_train, x_test, y_train, y_test = train_test_split(
features,
labels,
stratify=labels,
test_size = 0.2,
random_state = 123)
# encode string class values as integers
label_encoder = LabelEncoder()
y_train = label_encoder.fit_transform(y_train)
y_test = label_encoder.transform(y_test)
scaler = StandardScaler()
x_train_sc = pd.DataFrame(scaler.fit_transform(x_train),
index=x_train.index,
columns=x_train.columns)
x_test_sc = scaler.transform(x_test)
# Parallel optimization with Mango
config_params = {'num_iteration': 40, 'initial_random': 10}
optim = MangoParallelOptimization(njobs=4,
configuration_params=config_params,
features_train=x_train,
target_train=y_train,
features_val=x_test,
target_val=y_test)
best_parameters, best_objective = optim.mango_optimization()
# Results
print('best parameters:', best_parameters)
print('best accuracy:', best_objective)
# Train the model with the best hyper-parameters
best_model = xgb.XGBClassifier(n_jobs=-1, **best_parameters)
best_model.fit(x_train, y_train)
The file I am using is here.
I have some questions:
-
First of all, running the code , gives :
Parallel object is not iterable -
If I want to use in the xgb classifier, the following arguments:
'use_label_encoder': False,
'eval_metric': 'mlogloss',
'seed': 123,
'enable_categorical': False
Can I do ?? :
for hyper_param in model_params:
model = xgb.XGBClassifier(**hyper_param,
use_label_encoder'=False,
eval_metric= 'mlogloss',
seed= 123,
enable_categorical= False )
- If I want to manually do the k fold , like this:
def _objective(self, **model_params):
kfold = StratifiedKFold(n_splits=2, shuffle=True, random_state=123)
for i, (train_idx, val_idx) in enumerate(kfold.split(x_train, y_train)):
x_train_, y_train_ = x_train[train_idx, :], y_train[train_idx]
x_val_, y_val_ = x_train[val_idx, :], y_train[val_idx]
model = xgb.XGBClassifier(**hyper_param)
history = model.fit(x_train_,
y_train_,
early_stopping_rounds=10,
eval_set=[(x_train_, y_train_), (x_val_, y_val_)])
....
How can I do that? And use the history object inside every fold iteration in order to plot things.
And finally return the result that mango wants.?
What kind of result should be?
Metadata
Metadata
Assignees
Labels
No labels