1110 integration tests fix (#1112)

* Many changes for integration tests * Random seed context improved * TPOT-based tests removed --------- Co-authored-by: nicl-nno <[email protected]>
aimclub · Aug 11, 2023 · 89ff552 · 89ff552
1 parent 4200b4e
commit 89ff552
Show file tree

Hide file tree

Showing 79 changed files with 496 additions and 429 deletions.
diff --git a/cases/credit_scoring/credit_scoring_problem.py b/cases/credit_scoring/credit_scoring_problem.py
@@ -1,19 +1,12 @@
 import logging
-import os
-import random
-from pathlib import Path
 
-import numpy as np
 from sklearn.metrics import roc_auc_score as roc_auc
 
 from fedot.api.main import Fedot
-from fedot.core.constants import BEST_QUALITY_PRESET_NAME
 from fedot.core.data.data import InputData
 from fedot.core.pipelines.pipeline import Pipeline
 from fedot.core.utils import fedot_project_root
-
-random.seed(1)
-np.random.seed(1)
+from fedot.core.utils import set_random_seed
 
 
 def calculate_validation_metric(pipeline: Pipeline, dataset_to_validate: InputData) -> float:
@@ -32,16 +25,15 @@ def run_credit_scoring_problem(train_file_path, test_file_path,
                                **composer_args):
     automl = Fedot(problem='classification',
                    timeout=timeout,
-                   preset=BEST_QUALITY_PRESET_NAME,
-                   logging_level=logging.DEBUG,
+                   preset='fast_train',
+                   logging_level=logging.FATAL,
                    **composer_args)
     automl.fit(train_file_path, target=target)
     automl.predict(test_file_path)
     metrics = automl.get_metrics()
 
-    if automl.history:
-        lb = automl.history.get_leaderboard()
-        Path(os.path.join('D:/', "leaderboard.csv")).write_text(lb)
+    if automl.history and automl.history.generations:
+        print(automl.history.get_leaderboard())
 
     if visualization:
         automl.current_pipeline.show()
@@ -57,16 +49,18 @@ def get_scoring_data():
     # a dataset that will be used as a train and test set during composition
 
     file_path_train = 'cases/data/scoring/scoring_train.csv'
-    full_path_train = os.path.join(str(fedot_project_root()), file_path_train)
+    full_path_train = fedot_project_root().joinpath(file_path_train)
 
     # a dataset for a final validation of the composed model
     file_path_test = 'cases/data/scoring/scoring_test.csv'
-    full_path_test = os.path.join(str(fedot_project_root()), file_path_test)
+    full_path_test = fedot_project_root().joinpath(file_path_test)
 
     return full_path_train, full_path_test
 
 
 if __name__ == '__main__':
+    set_random_seed(42)
+
     full_path_train, full_path_test = get_scoring_data()
     run_credit_scoring_problem(full_path_train,
                                full_path_test,

diff --git a/cases/credit_scoring/credit_scoring_problem_multiobj.py b/cases/credit_scoring/credit_scoring_problem_multiobj.py
@@ -1,13 +1,9 @@
 import datetime
-import random
 
-import numpy as np
 from golem.core.optimisers.genetic.gp_params import GPAlgorithmParameters
 from golem.core.optimisers.genetic.operators.inheritance import GeneticSchemeTypesEnum
 from golem.core.optimisers.genetic.operators.selection import SelectionTypesEnum
 from golem.core.tuning.sequential import SequentialTuner
-
-from fedot.core.pipelines.pipeline_composer_requirements import PipelineComposerRequirements
 from golem.visualisation.opt_viz_extra import OptHistoryExtraVisualizer
 from sklearn.metrics import roc_auc_score as roc_auc
 
@@ -16,13 +12,12 @@
 from fedot.core.data.data import InputData
 from fedot.core.pipelines.node import PipelineNode
 from fedot.core.pipelines.pipeline import Pipeline
+from fedot.core.pipelines.pipeline_composer_requirements import PipelineComposerRequirements
 from fedot.core.pipelines.tuning.tuner_builder import TunerBuilder
 from fedot.core.repository.operation_types_repository import get_operations_for_task
 from fedot.core.repository.quality_metrics_repository import ClassificationMetricsEnum, ComplexityMetricsEnum
 from fedot.core.repository.tasks import Task, TaskTypesEnum
-
-random.seed(12)
-np.random.seed(12)
+from fedot.core.utils import set_random_seed
 
 
 def results_visualization(history, composed_pipelines):
@@ -55,7 +50,7 @@ def run_credit_scoring_problem(train_file_path, test_file_path,
 
     # the choice of the metric for the pipeline quality assessment during composition
     quality_metric = ClassificationMetricsEnum.ROCAUC
-    complexity_metric = ComplexityMetricsEnum.node_num
+    complexity_metric = ComplexityMetricsEnum.node_number
     metrics = [quality_metric, complexity_metric]
     # the choice and initialisation of the GP search
     composer_requirements = PipelineComposerRequirements(
@@ -70,11 +65,13 @@ def run_credit_scoring_problem(train_file_path, test_file_path,
     )
 
     # Create composer and with required composer params
-    composer = ComposerBuilder(task=task). \
-        with_optimizer_params(params). \
-        with_requirements(composer_requirements). \
-        with_metrics(metrics). \
-        build()
+    composer = (
+        ComposerBuilder(task=task)
+        .with_optimizer_params(params)
+        .with_requirements(composer_requirements)
+        .with_metrics(metrics)
+        .build()
+    )
 
     # the optimal pipeline generation by composition - the most time-consuming task
     pipelines_evo_composed = composer.compose_pipeline(data=dataset_to_compose)
@@ -88,11 +85,13 @@ def run_credit_scoring_problem(train_file_path, test_file_path,
 
     for pipeline_num, pipeline_evo_composed in enumerate(pipelines_evo_composed):
 
-        tuner = TunerBuilder(task)\
-            .with_tuner(SequentialTuner)\
-            .with_iterations(50)\
-            .with_metric(metrics[0])\
+        tuner = (
+            TunerBuilder(task)
+            .with_tuner(SequentialTuner)
+            .with_iterations(50)
+            .with_metric(metrics[0])
             .build(dataset_to_compose)
+        )
         nodes = pipeline_evo_composed.nodes
         for node_index, node in enumerate(nodes):
             if isinstance(node, PipelineNode) and node.is_primary:
@@ -115,5 +114,7 @@ def run_credit_scoring_problem(train_file_path, test_file_path,
 
 
 if __name__ == '__main__':
+    set_random_seed(12)
+
     full_path_train, full_path_test = get_scoring_data()
     run_credit_scoring_problem(full_path_train, full_path_test, visualization=True)
diff --git a/cases/metocean_forecasting_problem.py b/cases/metocean_forecasting_problem.py
@@ -44,7 +44,7 @@ def run_metocean_forecasting_problem(train_file_path, test_file_path,
 
     fedot = Fedot(problem='ts_forecasting',
                   task_params=TsForecastingParams(forecast_length=forecast_length),
-                  timeout=timeout, logging_level=logging.DEBUG)
+                  timeout=timeout, logging_level=logging.FATAL)
 
     pipeline = fedot.fit(features=historical_data, target=ssh_history)
     fedot.forecast(historical_data)

diff --git a/docs/source/advanced/cli_call.rst b/docs/source/advanced/cli_call.rst
@@ -56,7 +56,6 @@ The result of execution is presented below:
                         Composer parameter: model names to use
   --tuning TUNING       Composer parameter: 1 - with tuning, 0 - without tuning
   --cv_folds CV_FOLDS   Composer parameter: Number of folds for cross-validation
-  --val_bl VAL_BL       Composer parameter: Number of validation blocks for time series forecasting
   --hist_path HIST_PATH
                         Composer parameter: Name of the folder for composing history
   --for_len FOR_LEN     Time Series Forecasting parameter: forecast length

diff --git a/examples/advanced/additional_learning.py b/examples/advanced/additional_learning.py
@@ -9,6 +9,7 @@
 from fedot.core.pipelines.pipeline import Pipeline
 from fedot.core.pipelines.pipeline_builder import PipelineBuilder
 from fedot.core.utils import fedot_project_root
+from fedot.core.utils import set_random_seed
 
 
 def run_additional_learning_example():
@@ -22,7 +23,7 @@ def run_additional_learning_example():
 
     problem = 'classification'
 
-    auto_model = Fedot(problem=problem, seed=42, timeout=5, preset='best_quality',
+    auto_model = Fedot(problem=problem, timeout=5, preset='best_quality',
                        initial_assumption=PipelineBuilder().add_node('scaling').add_node('logit').build())
 
     auto_model.fit(features=deepcopy(train_data.head(1000)), target='target')
@@ -40,16 +41,16 @@ def run_additional_learning_example():
     train_data = train_data.head(5000)
     timeout = 1
 
-    auto_model_from_atomized = Fedot(problem=problem, seed=42, preset='best_quality', timeout=timeout,
-                                     logging_level=logging.INFO,
+    auto_model_from_atomized = Fedot(problem=problem, preset='best_quality', timeout=timeout,
+                                     logging_level=logging.FATAL,
                                      initial_assumption=atomized_model)
     auto_model_from_atomized.fit(features=deepcopy(train_data), target='target')
     auto_model_from_atomized.predict_proba(features=deepcopy(test_data))
     auto_model_from_atomized.current_pipeline.show()
     print('auto_model_from_atomized', auto_model_from_atomized.get_metrics(deepcopy(test_data_target)))
 
-    auto_model_from_pipeline = Fedot(problem=problem, seed=42, preset='best_quality', timeout=timeout,
-                                     logging_level=logging.INFO,
+    auto_model_from_pipeline = Fedot(problem=problem, preset='best_quality', timeout=timeout,
+                                     logging_level=logging.FATAL,
                                      initial_assumption=non_atomized_model)
     auto_model_from_pipeline.fit(features=deepcopy(train_data), target='target')
     auto_model_from_pipeline.predict_proba(features=deepcopy(test_data))
@@ -58,4 +59,6 @@ def run_additional_learning_example():
 
 
 if __name__ == '__main__':
+    set_random_seed(42)
+
     run_additional_learning_example()
diff --git a/examples/advanced/decompose/classification_refinement_example.py b/examples/advanced/decompose/classification_refinement_example.py
@@ -1,6 +1,3 @@
-import random
-
-import numpy as np
 from golem.core.tuning.simultaneous import SimultaneousTuner
 
 from cases.credit_scoring.credit_scoring_problem import get_scoring_data, calculate_validation_metric
@@ -10,9 +7,7 @@
 from fedot.core.pipelines.tuning.tuner_builder import TunerBuilder
 from fedot.core.repository.quality_metrics_repository import ClassificationMetricsEnum
 from fedot.core.repository.tasks import Task, TaskTypesEnum
-
-random.seed(1)
-np.random.seed(1)
+from fedot.core.utils import set_random_seed
 
 
 def get_refinement_pipeline():
@@ -66,11 +61,13 @@ def run_refinement_scoring_example(train_path, test_path, with_tuning=False):
     display_roc_auc(decompose_c, test_dataset, 'With decomposition pipeline')
 
     if with_tuning:
-        tuner = TunerBuilder(task) \
-            .with_tuner(SimultaneousTuner)\
-            .with_metric(ClassificationMetricsEnum.ROCAUC)\
-            .with_iterations(30) \
+        tuner = (
+            TunerBuilder(task)
+            .with_tuner(SimultaneousTuner)
+            .with_metric(ClassificationMetricsEnum.ROCAUC)
+            .with_iterations(30)
             .build(train_dataset)
+        )
         no_decompose_c = tuner.tune(no_decompose_c)
         decompose_c = tuner.tune(decompose_c)
 
@@ -82,5 +79,7 @@ def run_refinement_scoring_example(train_path, test_path, with_tuning=False):
 
 
 if __name__ == '__main__':
+    set_random_seed(1)
+
     full_path_train, full_path_test = get_scoring_data()
     run_refinement_scoring_example(full_path_train, full_path_test, with_tuning=True)
diff --git a/examples/advanced/decompose/refinement_forecast_example.py b/examples/advanced/decompose/refinement_forecast_example.py
@@ -12,9 +12,10 @@
 from fedot.core.pipelines.ts_wrappers import in_sample_ts_forecast
 from fedot.core.repository.dataset_types import DataTypesEnum
 from fedot.core.repository.tasks import TaskTypesEnum, Task, TsForecastingParams
+from fedot.core.utils import set_random_seed
+
 
 warnings.filterwarnings('ignore')
-np.random.seed(2020)
 
 
 def get_refinement_pipeline_with_polyfit():
@@ -160,6 +161,8 @@ def run_refinement_forecast(path_to_file, len_forecast=100, lagged=150,
 
 
 if __name__ == '__main__':
+    set_random_seed(2020)
+
     path = '../../../cases/data/time_series/economic_data.csv'
     run_refinement_forecast(path, len_forecast=50, validation_blocks=5,
                             lagged=50, vis_with_decompose=True)
diff --git a/examples/advanced/fedot_based_solutions/external_optimizer.py b/examples/advanced/fedot_based_solutions/external_optimizer.py
@@ -13,7 +13,7 @@ def run_with_random_search_composer():
     composer_params = {'available_operations': ['class_decompose', 'rf', 'linear', 'xgboost', 'dt'],
                        'optimizer': RandomMutationSearchOptimizer}
 
-    automl = Fedot(problem='classification', timeout=1, logging_level=logging.DEBUG,
+    automl = Fedot(problem='classification', timeout=1, logging_level=logging.FATAL,
                    preset='fast_train', **composer_params)
 
     automl.fit(train_data_path)

diff --git a/examples/advanced/multimodal_text_num_example.py b/examples/advanced/multimodal_text_num_example.py
@@ -1,12 +1,12 @@
-from pathlib import Path
-
 from fedot.api.main import Fedot
 from fedot.core.data.data_split import train_test_data_setup
 from fedot.core.data.multi_modal import MultiModalData
 from fedot.core.utils import fedot_project_root
+from fedot.core.utils import set_random_seed
 
 
-def run_multi_modal_example(file_path: str, visualization=False, with_tuning=True) -> float:
+def run_multi_modal_example(file_path: str, visualization: bool = False, with_tuning: bool = True,
+                            timeout: float = 10.) -> float:
     """
     Runs FEDOT on multimodal data from the `Wine Reviews dataset
     <https://www.kaggle.com/datasets/zynicide/wine-reviews>`_.
@@ -18,16 +18,17 @@ def run_multi_modal_example(file_path: str, visualization=False, with_tuning=Tru
         file_path: path to the file with multimodal data.
         visualization: if True, then final pipeline will be visualised.
         with_tuning: if True, then pipeline will be tuned.
+        timeout: overall fitting duration
 
     Returns:
         F1 metrics of the model.
     """
     task = 'classification'
-    path = Path(fedot_project_root(), file_path)
+    path = fedot_project_root().joinpath(file_path)
     data = MultiModalData.from_csv(file_path=path, task=task, target_columns='variety', index_col=None)
     fit_data, predict_data = train_test_data_setup(data, shuffle_flag=True, split_ratio=0.7)
 
-    automl_model = Fedot(problem=task, timeout=10, with_tuning=with_tuning)
+    automl_model = Fedot(problem=task, timeout=timeout, with_tuning=with_tuning, n_jobs=1)
     automl_model.fit(features=fit_data,
                      target=fit_data.target)
 
@@ -39,8 +40,10 @@ def run_multi_modal_example(file_path: str, visualization=False, with_tuning=Tru
 
     print(f'F1 for validation sample is {round(metrics["f1"], 3)}')
 
-    return metrics["f1"]
+    return metrics['f1']
 
 
 if __name__ == '__main__':
+    set_random_seed(42)
+
     run_multi_modal_example(file_path='examples/data/multimodal_wine.csv', visualization=True)
diff --git a/examples/advanced/multiobj_optimisation.py b/examples/advanced/multiobj_optimisation.py
@@ -2,6 +2,7 @@
 
 from fedot.api.main import Fedot
 from fedot.core.utils import fedot_project_root
+from fedot.core.utils import set_random_seed
 
 
 def run_classification_multiobj_example(visualization=False, timeout=1, with_tuning=True):
@@ -11,8 +12,8 @@ def run_classification_multiobj_example(visualization=False, timeout=1, with_tun
     del test_data['class']
     problem = 'classification'
 
-    metric_names = ['f1', 'node_num']
-    auto_model = Fedot(problem=problem, timeout=timeout, preset='best_quality', seed=42,
+    metric_names = ['f1', 'node_number']
+    auto_model = Fedot(problem=problem, timeout=timeout, preset='best_quality',
                        metric=metric_names,
                        with_tuning=with_tuning)
     auto_model.fit(features=train_data, target='class')
@@ -27,4 +28,6 @@ def run_classification_multiobj_example(visualization=False, timeout=1, with_tun
 
 
 if __name__ == '__main__':
+    set_random_seed(42)
+
     run_classification_multiobj_example(visualization=True)
diff --git a/examples/advanced/parallelization_comparison.py b/examples/advanced/parallelization_comparison.py
@@ -46,7 +46,7 @@ def run_experiments(timeout: float = None, partitions_n=10, n_jobs=-1):
             train_data_tmp = train_data.iloc[:partition].copy()
             start_time = timeit.default_timer()
             auto_model = Fedot(problem=problem, seed=42, timeout=timeout,
-                               n_jobs=_n_jobs, logging_level=logging.NOTSET,
+                               n_jobs=_n_jobs, logging_level=logging.FATAL,
                                with_tuning=False, preset='fast_train')
             auto_model.fit(features=train_data_tmp, target='target')
             times[_n_jobs].append((timeit.default_timer() - start_time) / 60)

diff --git a/examples/advanced/profiler_example.py b/examples/advanced/profiler_example.py
@@ -1,16 +1,14 @@
 import os
-import random
 
-import numpy as np
 from golem.utilities.profiler.memory_profiler import MemoryProfiler
 from golem.utilities.profiler.time_profiler import TimeProfiler
 
 from cases.credit_scoring.credit_scoring_problem import run_credit_scoring_problem, get_scoring_data
+from fedot.core.utils import set_random_seed
 
-random.seed(1)
-np.random.seed(1)
 
 if __name__ == '__main__':
+    set_random_seed(1)
     # JUST UNCOMMENT WHAT TYPE OF PROFILER DO YOU NEED
     # EXAMPLE of MemoryProfiler.