Skip to content

'Parallel' object is not iterable #73

@ggous

Description

@ggous

Hello and thanks for this project! It seems very promising!

I am trying to train a xgboost classifier.

My code is:


import pandas as pd
import numpy as np
import xgboost as xgb
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.model_selection import StratifiedKFold
from scipy.stats import uniform
from mango.domain.distribution import loguniform
from mango import Tuner
from joblib import Parallel, delayed

xgb_params = {
    'n_estimators': range(10, 200, 50), # 10 to 200 in steps of 50
    'max_depth': range(1, 15), # 1 to 14
    'reg_alpha': loguniform(-3, 6),  # 10^-3 to 10^3
    'booster': ['gbtree', 'gblinear'],
    'colsample_bylevel': uniform(0.05, 0.95), # 0.05 to 1.0
    'colsample_bytree': uniform(0.05, 0.95), # 0.05 to 1.0
    'learning_rate': loguniform(-3, 3),  # 0.001 to 1
    'reg_lambda': loguniform(-3, 6),  # 10^-3 to 10^3
    'min_child_weight': loguniform(0, 2), # 1 to 100
    'subsample': uniform(0.1, 0.89), # 0.1 to 0.99
}
 
class MangoParallelOptimization:
    def __init__(self,
                 njobs, 
                 configuration_params,
                 features_train,
                 target_train,
                 features_val,
                 target_val):
        self.njobs = njobs
        self.conf_dict = configuration_params
        self.x_train = features_train
        self.y_train = target_train
        self.x_val = features_val
        self.y_val = target_val
        self.space = xgb_params
        
        
    def _objective(self, **model_params):
        kfold = StratifiedKFold(n_splits=2, shuffle=True, random_state=123)
        results = []
        for hyper_param in model_params:
            model = xgb.XGBClassifier(**hyper_param)
                                      
            result = cross_val_score(model, 
                                     self.x_train,
                                     self.y_train,
                                     scoring='accuracy',
                                     cv=kfold).mean()
            results.append(result)
        return results
        
           
    def _objective2(self, params_batch):
        global parameters
        results_batch = Parallel(self.njobs,
                                 backend='multiprocessing')
        (delayed(self._objective)(**params) for params in params_batch)
        acc = [result for result in results_batch]
        return acc
    
    def mango_optimization(self):
        tuner = Tuner(self.space, self._objective2, self.conf_dict)
        optimization_results = tuner.maximize()
        return optimization_results['best_params'], optimization_results['best_objective']
    
    
    
if __name__=="__main__":
    df = pd.read_csv('/home/ggous/example.csv')
    df.dropna(axis=1, inplace=True)
    features = df.drop(['id', 'CLASS'], axis=1)
    labels = df['CLASS'].values
    
    # Split the data into training and testing sets
    x_train, x_test, y_train, y_test = train_test_split(
        features,
        labels,
        stratify=labels,
        test_size = 0.2,
        random_state = 123)
    
    # encode string class values as integers
    label_encoder = LabelEncoder()
    y_train = label_encoder.fit_transform(y_train)
    y_test = label_encoder.transform(y_test)

    scaler = StandardScaler()
    x_train_sc = pd.DataFrame(scaler.fit_transform(x_train),
                              index=x_train.index, 
                              columns=x_train.columns)
    x_test_sc = scaler.transform(x_test)

    # Parallel optimization with Mango
    config_params = {'num_iteration': 40, 'initial_random': 10}
    optim = MangoParallelOptimization(njobs=4,
                                                            configuration_params=config_params,
                                                            features_train=x_train, 
                                                            target_train=y_train,
                                                            features_val=x_test,
                                                            target_val=y_test)
     
    best_parameters, best_objective = optim.mango_optimization()

    # Results
    print('best parameters:', best_parameters)
    print('best accuracy:', best_objective)
    # Train the model with the best hyper-parameters 
    best_model = xgb.XGBClassifier(n_jobs=-1, **best_parameters)
    best_model.fit(x_train, y_train)

The file I am using is here.

I have some questions:

  1. First of all, running the code , gives : Parallel object is not iterable

  2. If I want to use in the xgb classifier, the following arguments:

'use_label_encoder': False,
'eval_metric': 'mlogloss',
'seed': 123,
'enable_categorical': False

Can I do ?? :

for hyper_param in model_params:
            model = xgb.XGBClassifier(**hyper_param, 
                                                       use_label_encoder'=False,
                                                       eval_metric= 'mlogloss',
                                                       seed= 123,
                                                       enable_categorical= False )
  1. If I want to manually do the k fold , like this:
def _objective(self, **model_params):
  kfold = StratifiedKFold(n_splits=2, shuffle=True, random_state=123)
  for i, (train_idx, val_idx) in enumerate(kfold.split(x_train, y_train)):
      x_train_, y_train_ = x_train[train_idx, :], y_train[train_idx]
      x_val_, y_val_ = x_train[val_idx, :], y_train[val_idx]
          
      model = xgb.XGBClassifier(**hyper_param) 
  
      history = model.fit(x_train_,
                          y_train_,
                          early_stopping_rounds=10,
                          eval_set=[(x_train_, y_train_), (x_val_, y_val_)])
      
      ....

How can I do that? And use the history object inside every fold iteration in order to plot things.
And finally return the result that mango wants.?
What kind of result should be?

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions