From f97526bad8c73956542d8592bda4f19e01936778 Mon Sep 17 00:00:00 2001 From: valer1435 Date: Mon, 7 Aug 2023 14:34:22 +0300 Subject: [PATCH 1/8] fix pep8 --- fedot/core/pipelines/random_pipeline_factory.py | 2 +- fedot/core/pipelines/verification_rules.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/fedot/core/pipelines/random_pipeline_factory.py b/fedot/core/pipelines/random_pipeline_factory.py index 1210af75c4..f964f0f54e 100644 --- a/fedot/core/pipelines/random_pipeline_factory.py +++ b/fedot/core/pipelines/random_pipeline_factory.py @@ -85,4 +85,4 @@ def graph_growth(graph: OptGraph, secondary_node = node_factory.get_node(is_primary=False) graph.add_node(secondary_node) node_parent.nodes_from.append(secondary_node) - graph_growth(graph, secondary_node, node_factory, requirements, max_depth) \ No newline at end of file + graph_growth(graph, secondary_node, node_factory, requirements, max_depth) diff --git a/fedot/core/pipelines/verification_rules.py b/fedot/core/pipelines/verification_rules.py index cfb02f480f..eebede3594 100644 --- a/fedot/core/pipelines/verification_rules.py +++ b/fedot/core/pipelines/verification_rules.py @@ -12,7 +12,7 @@ def has_correct_operations_for_task(pipeline: Pipeline, task_type: Optional[TaskTypesEnum] = None): - if task_type and not task_type in pipeline.root_node.operation.acceptable_task_types: + if task_type and task_type not in pipeline.root_node.operation.acceptable_task_types: raise ValueError(f'{ERROR_PREFIX} Pipeline has incorrect operations positions') return True From 124f327bfd2fee329f6deb00ab1cb9dfca8254d2 Mon Sep 17 00:00:00 2001 From: valer1435 Date: Wed, 2 Aug 2023 14:10:06 +0300 Subject: [PATCH 2/8] fix pos_label --- fedot/core/composer/metrics.py | 18 +++++++++++++++--- test/integration/models/test_model.py | 14 ++++++++++++++ test/unit/composer/test_quality_metrics.py | 2 -- test/unit/data/test_data.py | 2 -- .../optimizer/test_pipeline_objective_eval.py | 19 ++++++++++++++++++- 5 files changed, 47 insertions(+), 8 deletions(-) diff --git a/fedot/core/composer/metrics.py b/fedot/core/composer/metrics.py index 505c356be9..d3d9e42c3e 100644 --- a/fedot/core/composer/metrics.py +++ b/fedot/core/composer/metrics.py @@ -88,7 +88,7 @@ def get_value(cls, pipeline: 'Pipeline', reference_data: InputData, save_path=Path(save_path, 'forecast.png')) except Exception as ex: - pipeline.log.info(f'Metric can not be evaluated because of: {ex}') + pipeline.log.info(f'Metric can not be evaluated because of: {ex}', raise_if_test=True) return metric @@ -216,7 +216,10 @@ def metric(reference: InputData, predicted: OutputData) -> float: if n_classes > 2: additional_params = {'average': F1.multiclass_averaging_mode} else: - additional_params = {'average': F1.binary_averaging_mode} + u, count = np.unique(np.ravel(reference.target), return_counts=True) + count_sort_ind = np.argsort(count) + pos_label = u[count_sort_ind[0]].item() + additional_params = {'average': F1.binary_averaging_mode, 'pos_label': pos_label} return f1_score(y_true=reference.target, y_pred=predicted.predict, **additional_params) @@ -271,7 +274,16 @@ class Precision(QualityMetric): @staticmethod @from_maximised_metric def metric(reference: InputData, predicted: OutputData) -> float: - return precision_score(y_true=reference.target, y_pred=predicted.predict) + n_classes = reference.num_classes + if n_classes > 2: + return precision_score(y_true=reference.target, y_pred=predicted.predict) + else: + u, count = np.unique(np.ravel(reference.target), return_counts=True) + count_sort_ind = np.argsort(count) + pos_label = u[count_sort_ind[0]].item() + additional_params = {'pos_label': pos_label} + return precision_score(y_true=reference.target, y_pred=predicted.predict, + **additional_params) class Logloss(QualityMetric): diff --git a/test/integration/models/test_model.py b/test/integration/models/test_model.py index 5fdc72b52f..171acb23ad 100644 --- a/test/integration/models/test_model.py +++ b/test/integration/models/test_model.py @@ -98,6 +98,20 @@ def classification_dataset(): threshold = 0.5 classes = np.array([0.0 if val <= threshold else 1.0 for val in y]) classes = np.expand_dims(classes, axis=1) + data = InputData(features=MinMaxScaler().fit_transform(x), target=classes, idx=np.arange(0, len(x)), + task=Task(TaskTypesEnum.classification), + data_type=DataTypesEnum.table) + return data + + +def classification_dataset_with_str_labels(): + samples = 1000 + x = 10.0 * np.random.rand(samples, ) - 5.0 + x = np.expand_dims(x, axis=1) + y = 1.0 / (1.0 + np.exp(np.power(x, -1.0))) + threshold = 0.5 + classes = np.array(['a' if val <= threshold else 'b' for val in y]) + classes = np.expand_dims(classes, axis=1) data = InputData(features=MinMaxScaler().fit_transform(x), target=classes, idx=np.arange(0, len(x)), task=Task(TaskTypesEnum.classification), data_type=DataTypesEnum.table) diff --git a/test/unit/composer/test_quality_metrics.py b/test/unit/composer/test_quality_metrics.py index bb9e1786f6..426faef66d 100644 --- a/test/unit/composer/test_quality_metrics.py +++ b/test/unit/composer/test_quality_metrics.py @@ -22,8 +22,6 @@ @pytest.fixture() def data_setup(): predictors, response = load_breast_cancer(return_X_y=True) - np.random.shuffle(predictors) - np.random.shuffle(response) response = response[:100] predictors = predictors[:100] diff --git a/test/unit/data/test_data.py b/test/unit/data/test_data.py index 7d987dc3c9..a3eb55bfaa 100644 --- a/test/unit/data/test_data.py +++ b/test/unit/data/test_data.py @@ -18,8 +18,6 @@ @pytest.fixture() def data_setup() -> InputData: predictors, response = load_iris(return_X_y=True) - np.random.shuffle(predictors) - np.random.shuffle(response) predictors = predictors[:100] response = response[:100] data = InputData(features=predictors, target=response, idx=np.arange(0, 100), diff --git a/test/unit/optimizer/test_pipeline_objective_eval.py b/test/unit/optimizer/test_pipeline_objective_eval.py index cfb3f86444..657b7a201e 100644 --- a/test/unit/optimizer/test_pipeline_objective_eval.py +++ b/test/unit/optimizer/test_pipeline_objective_eval.py @@ -18,7 +18,7 @@ RegressionMetricsEnum from fedot.core.repository.tasks import Task, TaskTypesEnum from fedot.core.validation.split import tabular_cv_generator, OneFoldInputDataSplit -from test.integration.models.test_model import classification_dataset +from test.integration.models.test_model import classification_dataset, classification_dataset_with_str_labels from test.unit.tasks.test_forecasting import get_simple_ts_pipeline from test.unit.validation.test_table_cv import sample_pipeline from test.unit.validation.test_time_series_cv import configure_experiment @@ -86,6 +86,23 @@ def test_pipeline_objective_evaluate_with_different_metrics(classification_datas assert np.isclose(fitness.value, act_fitness.value, atol=1e-8), metric.name +@pytest.mark.parametrize( + 'pipeline', + [pipeline_first_test(), pipeline_second_test(), pipeline_third_test()] +) +def test_pipeline_objective_evaluate_with_different_metrics_with_str_labes(pipeline): + for metric in ClassificationMetricsEnum: + one_fold_split = OneFoldInputDataSplit() + data_split = partial(one_fold_split.input_split, input_data=classification_dataset_with_str_labels()) + check_pipeline = deepcopy(pipeline) + objective_eval = PipelineObjectiveEvaluate(MetricsObjective(metric), data_split) + fitness = objective_eval(pipeline) + act_fitness = actual_fitness(data_split, check_pipeline, metric) + assert fitness.valid + assert fitness.value is not None + assert np.isclose(fitness.value, act_fitness.value, atol=1e-8), metric.name + + def test_pipeline_objective_evaluate_with_empty_pipeline(classification_dataset): pipeline = empty_pipeline() From 8fcdae4199521e93adeb9439a2d28644493f48da Mon Sep 17 00:00:00 2001 From: valer1435 Date: Thu, 3 Aug 2023 17:54:44 +0300 Subject: [PATCH 3/8] fix resample --- .../core/pipelines/random_pipeline_factory.py | 3 ++- fedot/core/pipelines/verification.py | 5 +++-- fedot/core/pipelines/verification_rules.py | 22 +++++++++++++++++++ .../repository/data/model_repository.json | 6 ++++- test/unit/validation/test_table_cv.py | 2 ++ 5 files changed, 34 insertions(+), 4 deletions(-) diff --git a/fedot/core/pipelines/random_pipeline_factory.py b/fedot/core/pipelines/random_pipeline_factory.py index f964f0f54e..d48e305b53 100644 --- a/fedot/core/pipelines/random_pipeline_factory.py +++ b/fedot/core/pipelines/random_pipeline_factory.py @@ -1,3 +1,4 @@ +import random from copy import deepcopy from random import randint from typing import Optional @@ -76,7 +77,7 @@ def graph_growth(graph: OptGraph, for offspring_node in range(offspring_size): height = distance_to_root_level(graph, node_parent) is_max_depth_exceeded = height >= max_depth - 2 - is_primary_node_selected = height < max_depth - 1 and randint(0, 1) + is_primary_node_selected = height < max_depth - 1 and random.random() > 0.3 if is_max_depth_exceeded or is_primary_node_selected: primary_node = node_factory.get_node(is_primary=True) node_parent.nodes_from.append(primary_node) diff --git a/fedot/core/pipelines/verification.py b/fedot/core/pipelines/verification.py index 6e619ba38c..6a5ff7d5a7 100644 --- a/fedot/core/pipelines/verification.py +++ b/fedot/core/pipelines/verification.py @@ -26,7 +26,7 @@ has_no_data_flow_conflicts_in_ts_pipeline, has_primary_nodes, is_pipeline_contains_ts_operations, - only_non_lagged_operations_are_primary + only_non_lagged_operations_are_primary, has_correct_location_of_resample ) from fedot.core.repository.tasks import TaskTypesEnum @@ -40,7 +40,8 @@ has_no_conflicts_with_data_flow, has_no_conflicts_in_decompose, has_correct_data_connections, - has_correct_data_sources] + has_correct_data_sources, + has_correct_location_of_resample] ts_rules = [is_pipeline_contains_ts_operations, only_non_lagged_operations_are_primary, diff --git a/fedot/core/pipelines/verification_rules.py b/fedot/core/pipelines/verification_rules.py index eebede3594..8b5610580c 100644 --- a/fedot/core/pipelines/verification_rules.py +++ b/fedot/core/pipelines/verification_rules.py @@ -152,6 +152,28 @@ def has_no_data_flow_conflicts_in_ts_pipeline(pipeline: Pipeline): return True +def has_correct_location_of_resample(pipeline: Pipeline): + """ + Pipeline can have only one resample operation located in start of the pipeline + + :param pipeline: pipeline for checking + """ + is_resample_primary = False + is_not_resample_primary = False + for node in pipeline.nodes: + if not node.nodes_from: + if node.name == 'resample': + is_resample_primary = True + else: + is_not_resample_primary = True + if node.name == 'resample': + raise ValueError( + f'{ERROR_PREFIX} Pipeline can have only one resample operation located in start of the pipeline') + if is_resample_primary and is_not_resample_primary: + raise ValueError( + f'{ERROR_PREFIX} Pipeline can have only one resample operation located in start of the pipeline') + + def get_wrong_links(ts_to_table_operations: list, ts_data_operations: list, non_ts_data_operations: list, ts_models: list, non_ts_models: list) -> dict: """ diff --git a/fedot/core/repository/data/model_repository.json b/fedot/core/repository/data/model_repository.json index 47d1be30c6..0ef86535fa 100644 --- a/fedot/core/repository/data/model_repository.json +++ b/fedot/core/repository/data/model_repository.json @@ -165,7 +165,10 @@ "meta": "sklearn_class", "presets": ["fast_train"], "tags": [ - "bayesian", "non_multi", "linear" + "simple", + "bayesian", + "non_multi", + "linear" ] }, "catboost": { @@ -186,6 +189,7 @@ "meta": "sklearn_class", "presets": ["fast_train", "*tree"], "tags": [ + "simple", "tree", "interpretable", "non_linear" diff --git a/test/unit/validation/test_table_cv.py b/test/unit/validation/test_table_cv.py index fe67c0ab34..e63ee9c5f3 100644 --- a/test/unit/validation/test_table_cv.py +++ b/test/unit/validation/test_table_cv.py @@ -4,9 +4,11 @@ import pytest from golem.core.tuning.simultaneous import SimultaneousTuner +from sklearn.metrics import roc_auc_score as roc_auc from sklearn.model_selection import KFold, StratifiedKFold from fedot.api.main import Fedot +from fedot.core.composer.composer_builder import ComposerBuilder from fedot.core.data.data import InputData from fedot.core.data.data_split import train_test_data_setup from fedot.core.optimisers.objective import PipelineObjectiveEvaluate From 293c4532d77e88261c173d87763af13ba814ace2 Mon Sep 17 00:00:00 2001 From: valer1435 Date: Mon, 7 Aug 2023 11:59:52 +0300 Subject: [PATCH 4/8] fix resample --- fedot/api/api_utils/api_params_repository.py | 5 ++++- .../composer/gp_composer/specific_operators.py | 11 +++++++++++ fedot/core/pipelines/pipeline.py | 15 ++++++++++++++- fedot/core/pipelines/verification_rules.py | 8 +++++--- 4 files changed, 34 insertions(+), 5 deletions(-) diff --git a/fedot/api/api_utils/api_params_repository.py b/fedot/api/api_utils/api_params_repository.py index ed90d03d8b..9a6e76f4ef 100644 --- a/fedot/api/api_utils/api_params_repository.py +++ b/fedot/api/api_utils/api_params_repository.py @@ -5,7 +5,8 @@ from golem.core.optimisers.genetic.operators.inheritance import GeneticSchemeTypesEnum from golem.core.optimisers.genetic.operators.mutation import MutationTypesEnum -from fedot.core.composer.gp_composer.specific_operators import parameter_change_mutation, boosting_mutation +from fedot.core.composer.gp_composer.specific_operators import parameter_change_mutation, boosting_mutation, \ + add_resample_mutation from fedot.core.constants import AUTO_PRESET_NAME from fedot.core.repository.tasks import TaskTypesEnum from fedot.core.utils import default_fedot_data_dir @@ -131,5 +132,7 @@ def _get_default_mutations(task_type: TaskTypesEnum, params) -> Sequence[Mutatio # TODO remove workaround after boosting mutation fix if task_type == TaskTypesEnum.ts_forecasting: mutations.append(partial(boosting_mutation, params=params)) + else: + mutations.append(add_resample_mutation) return mutations diff --git a/fedot/core/composer/gp_composer/specific_operators.py b/fedot/core/composer/gp_composer/specific_operators.py index 439bd89fad..2e53c8ae50 100644 --- a/fedot/core/composer/gp_composer/specific_operators.py +++ b/fedot/core/composer/gp_composer/specific_operators.py @@ -90,6 +90,17 @@ def boosting_mutation(pipeline: Pipeline, requirements, graph_gen_params, **kwar return pipeline +def add_resample_mutation(pipeline: Pipeline, **kwargs): + resample_node = PipelineNode('resample') + + p_nodes = [p_node for p_node in pipeline.primary_nodes] + pipeline.add_node(resample_node) + + for node in p_nodes: + pipeline.connect_nodes(resample_node, node) + return pipeline + + def choose_new_model(boosting_model_candidates: List[str]) -> str: """ Since 'linear' and 'dtreg' operations are suitable for solving the problem and they are simpler than others, they are preferred """ diff --git a/fedot/core/pipelines/pipeline.py b/fedot/core/pipelines/pipeline.py index 220984b206..3778f27934 100644 --- a/fedot/core/pipelines/pipeline.py +++ b/fedot/core/pipelines/pipeline.py @@ -1,7 +1,7 @@ from copy import deepcopy from datetime import timedelta from os import PathLike -from typing import Optional, Tuple, Union, Sequence, Dict +from typing import Optional, Tuple, Union, Sequence, List, Dict import func_timeout from golem.core.dag.graph import Graph @@ -327,6 +327,19 @@ def root_node(self) -> Optional[PipelineNode]: raise ValueError(f'{ERROR_PREFIX} More than 1 root_nodes in pipeline') return root[0] + @property + def primary_nodes(self) -> List[PipelineNode]: + """Finds pipelines sink-node + + Returns: + the final predictor-node + """ + if not self.nodes: + return [] + primary_nodes = [node for node in self.nodes + if not node.nodes_from] + return primary_nodes + def pipeline_for_side_task(self, task_type: TaskTypesEnum) -> 'Pipeline': """Returns pipeline formed from the last node solving the given problem and all its parents diff --git a/fedot/core/pipelines/verification_rules.py b/fedot/core/pipelines/verification_rules.py index 8b5610580c..fbc1c13ea8 100644 --- a/fedot/core/pipelines/verification_rules.py +++ b/fedot/core/pipelines/verification_rules.py @@ -166,12 +166,14 @@ def has_correct_location_of_resample(pipeline: Pipeline): is_resample_primary = True else: is_not_resample_primary = True - if node.name == 'resample': - raise ValueError( - f'{ERROR_PREFIX} Pipeline can have only one resample operation located in start of the pipeline') + else: + if node.name == 'resample': + raise ValueError( + f'{ERROR_PREFIX} Pipeline can have only one resample operation located in start of the pipeline') if is_resample_primary and is_not_resample_primary: raise ValueError( f'{ERROR_PREFIX} Pipeline can have only one resample operation located in start of the pipeline') + return True def get_wrong_links(ts_to_table_operations: list, ts_data_operations: list, non_ts_data_operations: list, From b9be1d8eb9ee0cdd91bcbebaaf25e683fc125024 Mon Sep 17 00:00:00 2001 From: valer1435 Date: Mon, 7 Aug 2023 18:04:22 +0300 Subject: [PATCH 5/8] fix unexpected inplace operations --- fedot/core/operations/operation.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fedot/core/operations/operation.py b/fedot/core/operations/operation.py index 23314407e1..deed03a172 100644 --- a/fedot/core/operations/operation.py +++ b/fedot/core/operations/operation.py @@ -1,4 +1,5 @@ from abc import abstractmethod +from copy import deepcopy from typing import Optional, Union, Dict, Any from golem.core.log import default_log @@ -80,6 +81,7 @@ def fit(self, params: Optional[Union[OperationParameters, dict]], data: InputDat Returns: tuple: trained operation and prediction on train data """ + data = deepcopy(data) self._init(data.task, params=params, n_samples_data=data.features.shape[0]) self.fitted_operation = self._eval_strategy.fit(train_data=data) @@ -119,6 +121,7 @@ def predict_for_fit(self, fitted_operation, data: InputData, params: Optional[Op def _predict(self, fitted_operation, data: InputData, params: Optional[OperationParameters] = None, output_mode: str = 'default', is_fit_stage: bool = False): + data = deepcopy(data) is_main_target = data.supplementary_data.is_main_target data_flow_length = data.supplementary_data.data_flow_length self._init(data.task, output_mode=output_mode, params=params, n_samples_data=data.features.shape[0]) From 4ae55dbd4ce99c8d59fa4e993053aa0a1116e096 Mon Sep 17 00:00:00 2001 From: valer1435 Date: Wed, 9 Aug 2023 17:44:54 +0300 Subject: [PATCH 6/8] review fixes --- fedot/core/composer/gp_composer/specific_operators.py | 7 +++++++ fedot/core/pipelines/pipeline.py | 8 ++++---- fedot/core/pipelines/random_pipeline_factory.py | 6 ++++-- fedot/core/pipelines/verification_rules.py | 2 +- 4 files changed, 16 insertions(+), 7 deletions(-) diff --git a/fedot/core/composer/gp_composer/specific_operators.py b/fedot/core/composer/gp_composer/specific_operators.py index 2e53c8ae50..7799fe432f 100644 --- a/fedot/core/composer/gp_composer/specific_operators.py +++ b/fedot/core/composer/gp_composer/specific_operators.py @@ -91,6 +91,13 @@ def boosting_mutation(pipeline: Pipeline, requirements, graph_gen_params, **kwar def add_resample_mutation(pipeline: Pipeline, **kwargs): + """ + Add resample operation before all primary operations in pipeline + + :param pipeline: pipeline to insert resample + + :return: mutated pipeline + """ resample_node = PipelineNode('resample') p_nodes = [p_node for p_node in pipeline.primary_nodes] diff --git a/fedot/core/pipelines/pipeline.py b/fedot/core/pipelines/pipeline.py index 3778f27934..a9a608c2c9 100644 --- a/fedot/core/pipelines/pipeline.py +++ b/fedot/core/pipelines/pipeline.py @@ -307,7 +307,7 @@ def load(self, source: Union[str, dict], dict_fitted_operations: Optional[dict] dict_fitted_operations: dictionary of the fitted operations """ - self.nodes = [] + self.nodes: Optional[List[PipelineNode]] = [] template = PipelineTemplate(self) template.import_pipeline(source, dict_fitted_operations) return self @@ -329,15 +329,15 @@ def root_node(self) -> Optional[PipelineNode]: @property def primary_nodes(self) -> List[PipelineNode]: - """Finds pipelines sink-node + """Finds pipeline's primary nodes Returns: - the final predictor-node + list of primary nodes """ if not self.nodes: return [] primary_nodes = [node for node in self.nodes - if not node.nodes_from] + if node.is_primary] return primary_nodes def pipeline_for_side_task(self, task_type: TaskTypesEnum) -> 'Pipeline': diff --git a/fedot/core/pipelines/random_pipeline_factory.py b/fedot/core/pipelines/random_pipeline_factory.py index d48e305b53..d08f180c58 100644 --- a/fedot/core/pipelines/random_pipeline_factory.py +++ b/fedot/core/pipelines/random_pipeline_factory.py @@ -15,6 +15,7 @@ class RandomPipelineFactory(RandomGraphFactory): """ Default realisation of random graph factory. Generates DAG graph using random growth. """ + PROBABILITY_OF_GROWTH = 0.3 def __init__(self, verifier: GraphVerifier, @@ -77,8 +78,9 @@ def graph_growth(graph: OptGraph, for offspring_node in range(offspring_size): height = distance_to_root_level(graph, node_parent) is_max_depth_exceeded = height >= max_depth - 2 - is_primary_node_selected = height < max_depth - 1 and random.random() > 0.3 - if is_max_depth_exceeded or is_primary_node_selected: + is_primary_node_selected = height < max_depth - 1 + is_growth_should_stopped = random.random() > RandomPipelineFactory.PROBABILITY_OF_GROWTH + if is_max_depth_exceeded or is_primary_node_selected or is_growth_should_stopped: primary_node = node_factory.get_node(is_primary=True) node_parent.nodes_from.append(primary_node) graph.add_node(primary_node) diff --git a/fedot/core/pipelines/verification_rules.py b/fedot/core/pipelines/verification_rules.py index fbc1c13ea8..7c40d53280 100644 --- a/fedot/core/pipelines/verification_rules.py +++ b/fedot/core/pipelines/verification_rules.py @@ -161,7 +161,7 @@ def has_correct_location_of_resample(pipeline: Pipeline): is_resample_primary = False is_not_resample_primary = False for node in pipeline.nodes: - if not node.nodes_from: + if node.is_primary: if node.name == 'resample': is_resample_primary = True else: From 1d4959079afdc89bc0cca418be09735db24809b1 Mon Sep 17 00:00:00 2001 From: valer1435 Date: Mon, 14 Aug 2023 11:17:05 +0300 Subject: [PATCH 7/8] remove deepcopy --- fedot/core/operations/operation.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/fedot/core/operations/operation.py b/fedot/core/operations/operation.py index deed03a172..1b34ebacde 100644 --- a/fedot/core/operations/operation.py +++ b/fedot/core/operations/operation.py @@ -1,5 +1,4 @@ from abc import abstractmethod -from copy import deepcopy from typing import Optional, Union, Dict, Any from golem.core.log import default_log @@ -81,7 +80,6 @@ def fit(self, params: Optional[Union[OperationParameters, dict]], data: InputDat Returns: tuple: trained operation and prediction on train data """ - data = deepcopy(data) self._init(data.task, params=params, n_samples_data=data.features.shape[0]) self.fitted_operation = self._eval_strategy.fit(train_data=data) @@ -121,7 +119,7 @@ def predict_for_fit(self, fitted_operation, data: InputData, params: Optional[Op def _predict(self, fitted_operation, data: InputData, params: Optional[OperationParameters] = None, output_mode: str = 'default', is_fit_stage: bool = False): - data = deepcopy(data) + is_main_target = data.supplementary_data.is_main_target data_flow_length = data.supplementary_data.data_flow_length self._init(data.task, output_mode=output_mode, params=params, n_samples_data=data.features.shape[0]) @@ -136,11 +134,11 @@ def _predict(self, fitted_operation, data: InputData, params: Optional[Operation predict_data=data) prediction = self.assign_tabular_column_types(prediction, output_mode) + # any inplace operations here are dangerous! if is_main_target is False: prediction.supplementary_data.is_main_target = is_main_target prediction.supplementary_data.data_flow_length = data_flow_length - prediction.supplementary_data.obligatorily_preprocessed = True return prediction @staticmethod From b7af0ed7c296c4e2621b7c5e83c179ec2e429172 Mon Sep 17 00:00:00 2001 From: valer1435 Date: Mon, 14 Aug 2023 12:08:17 +0300 Subject: [PATCH 8/8] pep8 --- test/unit/validation/test_table_cv.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/test/unit/validation/test_table_cv.py b/test/unit/validation/test_table_cv.py index e63ee9c5f3..fe67c0ab34 100644 --- a/test/unit/validation/test_table_cv.py +++ b/test/unit/validation/test_table_cv.py @@ -4,11 +4,9 @@ import pytest from golem.core.tuning.simultaneous import SimultaneousTuner -from sklearn.metrics import roc_auc_score as roc_auc from sklearn.model_selection import KFold, StratifiedKFold from fedot.api.main import Fedot -from fedot.core.composer.composer_builder import ComposerBuilder from fedot.core.data.data import InputData from fedot.core.data.data_split import train_test_data_setup from fedot.core.optimisers.objective import PipelineObjectiveEvaluate