diff --git a/fedot/api/api_utils/api_params_repository.py b/fedot/api/api_utils/api_params_repository.py index 2675e594ce..22c6e1ba83 100644 --- a/fedot/api/api_utils/api_params_repository.py +++ b/fedot/api/api_utils/api_params_repository.py @@ -34,11 +34,9 @@ def default_params_for_task(task_type: TaskTypesEnum) -> dict: """ Returns a dict with default parameters""" if task_type in [TaskTypesEnum.classification, TaskTypesEnum.regression]: cv_folds = 5 - validation_blocks = None elif task_type == TaskTypesEnum.ts_forecasting: cv_folds = 3 - validation_blocks = None # Dict with allowed keyword attributes for Api and their default values. If None - default value set # in dataclasses ``PipelineComposerRequirements``, ``GPAlgorithmParameters``, ``GraphGenerationParams`` @@ -53,7 +51,7 @@ def default_params_for_task(task_type: TaskTypesEnum) -> dict: keep_n_best=1, available_operations=None, metric=None, - validation_blocks=validation_blocks, + validation_blocks=None, cv_folds=cv_folds, genetic_scheme=None, early_stopping_iterations=None, @@ -81,7 +79,6 @@ def check_and_set_default_params(self, params: dict) -> dict: invalid_keys = params.keys() - allowed_keys if invalid_keys: raise KeyError(f"Invalid key parameters {invalid_keys}") - else: missing_params = self.default_params.keys() - params.keys() for k in missing_params: diff --git a/fedot/api/api_utils/input_analyser.py b/fedot/api/api_utils/input_analyser.py index 6470202e3d..2dda08b5a4 100644 --- a/fedot/api/api_utils/input_analyser.py +++ b/fedot/api/api_utils/input_analyser.py @@ -45,7 +45,7 @@ def give_recommendations(self, input_data: Union[InputData, MultiModalData], inp """ if input_params is None: - input_params = {} + input_params = dict() recommendations_for_data = dict() recommendations_for_params = dict() @@ -55,15 +55,20 @@ def give_recommendations(self, input_data: Union[InputData, MultiModalData], inp recommendations_for_data[data_source_name], recommendations_for_params[data_source_name] = \ self.give_recommendations(input_data[data_source_name], input_params=input_params) - elif isinstance(input_data, InputData) and input_data.data_type in [DataTypesEnum.table, DataTypesEnum.text]: - recommendations_for_data = self._give_recommendations_for_data(input_data=input_data) - - recommendations_for_params = dict() - if 'use_meta_rules' in input_params.keys() and input_params['use_meta_rules']: - recommendations_for_params = self._give_recommendations_with_meta_rules(input_data=input_data, - input_params=input_params) - if 'label_encoded' in recommendations_for_data.keys(): - recommendations_for_params['label_encoded'] = recommendations_for_data['label_encoded'] + elif isinstance(input_data, InputData): + if input_data.data_type in [DataTypesEnum.table, DataTypesEnum.text]: + recommendations_for_data = self._give_recommendations_for_data(input_data=input_data) + if 'use_meta_rules' in input_params and input_params['use_meta_rules']: + recommendations_for_params = self._give_recommendations_with_meta_rules(input_data=input_data, + input_params=input_params) + if 'label_encoded' in recommendations_for_data: + recommendations_for_params['label_encoded'] = recommendations_for_data['label_encoded'] + elif input_data.data_type is DataTypesEnum.ts: + if input_params.get('validation_blocks') is None: + cv_folds = input_params.get('cv_folds') or 1 + test_size = input_data.target.shape[0] / (cv_folds + 1) + val_blocks = test_size // input_data.task.task_params.forecast_length + recommendations_for_params['validation_blocks'] = int(val_blocks) return recommendations_for_data, recommendations_for_params diff --git a/fedot/api/api_utils/params.py b/fedot/api/api_utils/params.py index cbd6bcf7c0..718fabe28d 100644 --- a/fedot/api/api_utils/params.py +++ b/fedot/api/api_utils/params.py @@ -68,9 +68,7 @@ def accept_and_apply_recommendations(self, input_data: Union[InputData, MultiMod self.change_preset_for_label_encoded_data(input_data.task, input_data.data_type) # update api params with recommendations obtained using meta rules - for key in self.data.keys(): - if key not in recommendations: - continue + for key in recommendations: self.update({key: recommendations[key]}) def change_preset_for_label_encoded_data(self, task: Task, data_type: DataTypesEnum): diff --git a/fedot/api/main.py b/fedot/api/main.py index a48f8813d8..8a62ec74b6 100644 --- a/fedot/api/main.py +++ b/fedot/api/main.py @@ -110,7 +110,7 @@ class Fedot: - ``5`` -> for classification and regression tasks - ``3`` -> for time series forecasting task - validation_blocks (int): number of validation blocks for time series forecasting. Default value is ``2``. + validation_blocks (int): number of validation blocks for time series forecasting. Default value is ``None``. show_progress (bool): indicates whether to show progress using tqdm/tuner or not. Defaults to ``True``. @@ -129,7 +129,7 @@ class Fedot: genetic_scheme (str): name of the genetic scheme. Defaults to ``steady_state``. with_tuning (bool): flag for tuning hyperparameters of the final evolved :class:`Pipeline`. - Defaults to ``True``. + Defaults to ``False``. preset (str): name of the preset for model building (e.g. ``'best_quality'``, ``'fast_train'``, ``'gpu'``). Default value is ``'auto'``. @@ -147,7 +147,7 @@ class Fedot: - ``'automl'`` -> A special preset with only AutoML libraries such as TPOT and H2O as operations use_input_preprocessing (bool): indicates whether to do preprocessing of further given data. - Defaults to ``True``. + Defaults to ``True``. If it is False, there may be problems with other settings with default ``None``. use_meta_rules (bool): indicates whether to change set params according to FEDOT meta rules. use_pipelines_cache (bool): indicates whether to use pipeline structures caching. Defaults to ``True``. use_preprocessing_cache (bool): bool indicating whether to use optional preprocessors caching. diff --git a/fedot/core/optimisers/objective/data_source_splitter.py b/fedot/core/optimisers/objective/data_source_splitter.py index d40fff1c74..fc3e92b835 100644 --- a/fedot/core/optimisers/objective/data_source_splitter.py +++ b/fedot/core/optimisers/objective/data_source_splitter.py @@ -23,7 +23,8 @@ class DataSourceSplitter: :param cv_folds: Number of folds on data for cross-validation. If provided, then k-fold validation is used. Otherwise, hold-out validation is used. :param validation_blocks: Number of validation blocks for time series forecasting. - :param split_ratio: Ratio of data for splitting. Applied only in case of hold-out split. + :param split_ratio: Ratio of data for splitting. + Applied only in case of hold-out split. Not for timeseries data. If not provided, then default split ratios will be used. :param shuffle: Is shuffling required for data. """ @@ -33,6 +34,10 @@ def __init__(self, validation_blocks: Optional[int] = None, split_ratio: Optional[float] = None, shuffle: bool = False): + + if validation_blocks is None: + raise ValueError('validation_blocks parameter is not defined') + self.cv_folds = cv_folds self.validation_blocks = validation_blocks self.split_ratio = split_ratio @@ -41,6 +46,10 @@ def __init__(self, self.log = default_log(self) def build(self, data: InputData) -> DataSource: + # check that validation blocks is defined for timeseries + if self.validation_blocks is None and data.task.task_type is TaskTypesEnum.ts_forecasting: + raise ValueError('validation_blocks parameter is not defined') + # Shuffle data if self.shuffle and data.task.task_type is not TaskTypesEnum.ts_forecasting: data.shuffle() @@ -66,10 +75,6 @@ def _build_holdout_producer(self, data: InputData) -> DataSource: """ split_ratio = self.split_ratio or default_data_split_ratio_by_task[data.task.task_type] - if data.task.task_type is TaskTypesEnum.ts_forecasting: - if self.validation_blocks is None: - self.validation_blocks = np.floor(data.target.shape[0] * split_ratio / - data.task.task_params.forecast_length) train_data, test_data = train_test_data_setup(data, split_ratio, validation_blocks=self.validation_blocks) if RemoteEvaluator().is_enabled: @@ -82,13 +87,6 @@ def _build_kfolds_producer(self, data: InputData) -> DataSource: raise NotImplementedError('Cross-validation is not supported for multi-modal data') if data.task.task_type is TaskTypesEnum.ts_forecasting: # Perform time series cross validation - if self.validation_blocks is None: - split_ratio = self.split_ratio or default_data_split_ratio_by_task[data.task.task_type] - default_validation_blocks = np.floor( - data.target.shape[0] * split_ratio / data.task.task_params.forecast_length) - self.validation_blocks = default_validation_blocks - self.log.info('For timeseries cross validation validation_blocks number was changed ' + - f'from None to {default_validation_blocks} blocks') cv_generator = partial(ts_cv_generator, data, self.cv_folds, self.validation_blocks,