Skip to content

Commit

Permalink
Fixes
Browse files Browse the repository at this point in the history
  • Loading branch information
kasyanovse committed Jul 26, 2023
1 parent e81f856 commit 9584b5e
Show file tree
Hide file tree
Showing 5 changed files with 30 additions and 32 deletions.
5 changes: 1 addition & 4 deletions fedot/api/api_utils/api_params_repository.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,11 +34,9 @@ def default_params_for_task(task_type: TaskTypesEnum) -> dict:
""" Returns a dict with default parameters"""
if task_type in [TaskTypesEnum.classification, TaskTypesEnum.regression]:
cv_folds = 5
validation_blocks = None

elif task_type == TaskTypesEnum.ts_forecasting:
cv_folds = 3
validation_blocks = None

# Dict with allowed keyword attributes for Api and their default values. If None - default value set
# in dataclasses ``PipelineComposerRequirements``, ``GPAlgorithmParameters``, ``GraphGenerationParams``
Expand All @@ -53,7 +51,7 @@ def default_params_for_task(task_type: TaskTypesEnum) -> dict:
keep_n_best=1,
available_operations=None,
metric=None,
validation_blocks=validation_blocks,
validation_blocks=None,
cv_folds=cv_folds,
genetic_scheme=None,
early_stopping_iterations=None,
Expand Down Expand Up @@ -81,7 +79,6 @@ def check_and_set_default_params(self, params: dict) -> dict:
invalid_keys = params.keys() - allowed_keys
if invalid_keys:
raise KeyError(f"Invalid key parameters {invalid_keys}")

else:
missing_params = self.default_params.keys() - params.keys()
for k in missing_params:
Expand Down
25 changes: 15 additions & 10 deletions fedot/api/api_utils/input_analyser.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ def give_recommendations(self, input_data: Union[InputData, MultiModalData], inp
"""

if input_params is None:
input_params = {}
input_params = dict()

recommendations_for_data = dict()
recommendations_for_params = dict()
Expand All @@ -55,15 +55,20 @@ def give_recommendations(self, input_data: Union[InputData, MultiModalData], inp
recommendations_for_data[data_source_name], recommendations_for_params[data_source_name] = \
self.give_recommendations(input_data[data_source_name],
input_params=input_params)
elif isinstance(input_data, InputData) and input_data.data_type in [DataTypesEnum.table, DataTypesEnum.text]:
recommendations_for_data = self._give_recommendations_for_data(input_data=input_data)

recommendations_for_params = dict()
if 'use_meta_rules' in input_params.keys() and input_params['use_meta_rules']:
recommendations_for_params = self._give_recommendations_with_meta_rules(input_data=input_data,
input_params=input_params)
if 'label_encoded' in recommendations_for_data.keys():
recommendations_for_params['label_encoded'] = recommendations_for_data['label_encoded']
elif isinstance(input_data, InputData):
if input_data.data_type in [DataTypesEnum.table, DataTypesEnum.text]:
recommendations_for_data = self._give_recommendations_for_data(input_data=input_data)
if 'use_meta_rules' in input_params and input_params['use_meta_rules']:
recommendations_for_params = self._give_recommendations_with_meta_rules(input_data=input_data,
input_params=input_params)
if 'label_encoded' in recommendations_for_data:
recommendations_for_params['label_encoded'] = recommendations_for_data['label_encoded']
elif input_data.data_type is DataTypesEnum.ts:
if input_params.get('validation_blocks') is None:
cv_folds = input_params.get('cv_folds') or 1
test_size = input_data.target.shape[0] / (cv_folds + 1)
val_blocks = test_size // input_data.task.task_params.forecast_length
recommendations_for_params['validation_blocks'] = int(val_blocks)

return recommendations_for_data, recommendations_for_params

Expand Down
4 changes: 1 addition & 3 deletions fedot/api/api_utils/params.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,9 +68,7 @@ def accept_and_apply_recommendations(self, input_data: Union[InputData, MultiMod
self.change_preset_for_label_encoded_data(input_data.task, input_data.data_type)

# update api params with recommendations obtained using meta rules
for key in self.data.keys():
if key not in recommendations:
continue
for key in recommendations:
self.update({key: recommendations[key]})

def change_preset_for_label_encoded_data(self, task: Task, data_type: DataTypesEnum):
Expand Down
6 changes: 3 additions & 3 deletions fedot/api/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,7 @@ class Fedot:
- ``5`` -> for classification and regression tasks
- ``3`` -> for time series forecasting task
validation_blocks (int): number of validation blocks for time series forecasting. Default value is ``2``.
validation_blocks (int): number of validation blocks for time series forecasting. Default value is ``None``.
show_progress (bool): indicates whether to show progress using tqdm/tuner or not. Defaults to ``True``.
Expand All @@ -129,7 +129,7 @@ class Fedot:
genetic_scheme (str): name of the genetic scheme. Defaults to ``steady_state``.
with_tuning (bool): flag for tuning hyperparameters of the final evolved :class:`Pipeline`.
Defaults to ``True``.
Defaults to ``False``.
preset (str): name of the preset for model building (e.g. ``'best_quality'``, ``'fast_train'``, ``'gpu'``).
Default value is ``'auto'``.
Expand All @@ -147,7 +147,7 @@ class Fedot:
- ``'automl'`` -> A special preset with only AutoML libraries such as TPOT and H2O as operations
use_input_preprocessing (bool): indicates whether to do preprocessing of further given data.
Defaults to ``True``.
Defaults to ``True``. If it is False, there may be problems with other settings with default ``None``.
use_meta_rules (bool): indicates whether to change set params according to FEDOT meta rules.
use_pipelines_cache (bool): indicates whether to use pipeline structures caching. Defaults to ``True``.
use_preprocessing_cache (bool): bool indicating whether to use optional preprocessors caching.
Expand Down
22 changes: 10 additions & 12 deletions fedot/core/optimisers/objective/data_source_splitter.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,8 @@ class DataSourceSplitter:
:param cv_folds: Number of folds on data for cross-validation.
If provided, then k-fold validation is used. Otherwise, hold-out validation is used.
:param validation_blocks: Number of validation blocks for time series forecasting.
:param split_ratio: Ratio of data for splitting. Applied only in case of hold-out split.
:param split_ratio: Ratio of data for splitting.
Applied only in case of hold-out split. Not for timeseries data.
If not provided, then default split ratios will be used.
:param shuffle: Is shuffling required for data.
"""
Expand All @@ -33,6 +34,10 @@ def __init__(self,
validation_blocks: Optional[int] = None,
split_ratio: Optional[float] = None,
shuffle: bool = False):

if validation_blocks is None:
raise ValueError('validation_blocks parameter is not defined')

self.cv_folds = cv_folds
self.validation_blocks = validation_blocks
self.split_ratio = split_ratio
Expand All @@ -41,6 +46,10 @@ def __init__(self,
self.log = default_log(self)

def build(self, data: InputData) -> DataSource:
# check that validation blocks is defined for timeseries
if self.validation_blocks is None and data.task.task_type is TaskTypesEnum.ts_forecasting:
raise ValueError('validation_blocks parameter is not defined')

# Shuffle data
if self.shuffle and data.task.task_type is not TaskTypesEnum.ts_forecasting:
data.shuffle()
Expand All @@ -66,10 +75,6 @@ def _build_holdout_producer(self, data: InputData) -> DataSource:
"""

split_ratio = self.split_ratio or default_data_split_ratio_by_task[data.task.task_type]
if data.task.task_type is TaskTypesEnum.ts_forecasting:
if self.validation_blocks is None:
self.validation_blocks = np.floor(data.target.shape[0] * split_ratio /
data.task.task_params.forecast_length)
train_data, test_data = train_test_data_setup(data, split_ratio, validation_blocks=self.validation_blocks)

if RemoteEvaluator().is_enabled:
Expand All @@ -82,13 +87,6 @@ def _build_kfolds_producer(self, data: InputData) -> DataSource:
raise NotImplementedError('Cross-validation is not supported for multi-modal data')
if data.task.task_type is TaskTypesEnum.ts_forecasting:
# Perform time series cross validation
if self.validation_blocks is None:
split_ratio = self.split_ratio or default_data_split_ratio_by_task[data.task.task_type]
default_validation_blocks = np.floor(
data.target.shape[0] * split_ratio / data.task.task_params.forecast_length)
self.validation_blocks = default_validation_blocks
self.log.info('For timeseries cross validation validation_blocks number was changed ' +
f'from None to {default_validation_blocks} blocks')
cv_generator = partial(ts_cv_generator, data,
self.cv_folds,
self.validation_blocks,
Expand Down

0 comments on commit 9584b5e

Please sign in to comment.