Skip to content

Commit

Permalink
1110 integration tests fix (#1112)
Browse files Browse the repository at this point in the history
* Many changes for integration tests

* Random seed context improved

* TPOT-based tests removed

---------

Co-authored-by: nicl-nno <[email protected]>
  • Loading branch information
IIaKyJIuH and nicl-nno authored Aug 11, 2023
1 parent 4200b4e commit 89ff552
Show file tree
Hide file tree
Showing 79 changed files with 496 additions and 429 deletions.
24 changes: 9 additions & 15 deletions cases/credit_scoring/credit_scoring_problem.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,12 @@
import logging
import os
import random
from pathlib import Path

import numpy as np
from sklearn.metrics import roc_auc_score as roc_auc

from fedot.api.main import Fedot
from fedot.core.constants import BEST_QUALITY_PRESET_NAME
from fedot.core.data.data import InputData
from fedot.core.pipelines.pipeline import Pipeline
from fedot.core.utils import fedot_project_root

random.seed(1)
np.random.seed(1)
from fedot.core.utils import set_random_seed


def calculate_validation_metric(pipeline: Pipeline, dataset_to_validate: InputData) -> float:
Expand All @@ -32,16 +25,15 @@ def run_credit_scoring_problem(train_file_path, test_file_path,
**composer_args):
automl = Fedot(problem='classification',
timeout=timeout,
preset=BEST_QUALITY_PRESET_NAME,
logging_level=logging.DEBUG,
preset='fast_train',
logging_level=logging.FATAL,
**composer_args)
automl.fit(train_file_path, target=target)
automl.predict(test_file_path)
metrics = automl.get_metrics()

if automl.history:
lb = automl.history.get_leaderboard()
Path(os.path.join('D:/', "leaderboard.csv")).write_text(lb)
if automl.history and automl.history.generations:
print(automl.history.get_leaderboard())

if visualization:
automl.current_pipeline.show()
Expand All @@ -57,16 +49,18 @@ def get_scoring_data():
# a dataset that will be used as a train and test set during composition

file_path_train = 'cases/data/scoring/scoring_train.csv'
full_path_train = os.path.join(str(fedot_project_root()), file_path_train)
full_path_train = fedot_project_root().joinpath(file_path_train)

# a dataset for a final validation of the composed model
file_path_test = 'cases/data/scoring/scoring_test.csv'
full_path_test = os.path.join(str(fedot_project_root()), file_path_test)
full_path_test = fedot_project_root().joinpath(file_path_test)

return full_path_train, full_path_test


if __name__ == '__main__':
set_random_seed(42)

full_path_train, full_path_test = get_scoring_data()
run_credit_scoring_problem(full_path_train,
full_path_test,
Expand Down
35 changes: 18 additions & 17 deletions cases/credit_scoring/credit_scoring_problem_multiobj.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,9 @@
import datetime
import random

import numpy as np
from golem.core.optimisers.genetic.gp_params import GPAlgorithmParameters
from golem.core.optimisers.genetic.operators.inheritance import GeneticSchemeTypesEnum
from golem.core.optimisers.genetic.operators.selection import SelectionTypesEnum
from golem.core.tuning.sequential import SequentialTuner

from fedot.core.pipelines.pipeline_composer_requirements import PipelineComposerRequirements
from golem.visualisation.opt_viz_extra import OptHistoryExtraVisualizer
from sklearn.metrics import roc_auc_score as roc_auc

Expand All @@ -16,13 +12,12 @@
from fedot.core.data.data import InputData
from fedot.core.pipelines.node import PipelineNode
from fedot.core.pipelines.pipeline import Pipeline
from fedot.core.pipelines.pipeline_composer_requirements import PipelineComposerRequirements
from fedot.core.pipelines.tuning.tuner_builder import TunerBuilder
from fedot.core.repository.operation_types_repository import get_operations_for_task
from fedot.core.repository.quality_metrics_repository import ClassificationMetricsEnum, ComplexityMetricsEnum
from fedot.core.repository.tasks import Task, TaskTypesEnum

random.seed(12)
np.random.seed(12)
from fedot.core.utils import set_random_seed


def results_visualization(history, composed_pipelines):
Expand Down Expand Up @@ -55,7 +50,7 @@ def run_credit_scoring_problem(train_file_path, test_file_path,

# the choice of the metric for the pipeline quality assessment during composition
quality_metric = ClassificationMetricsEnum.ROCAUC
complexity_metric = ComplexityMetricsEnum.node_num
complexity_metric = ComplexityMetricsEnum.node_number
metrics = [quality_metric, complexity_metric]
# the choice and initialisation of the GP search
composer_requirements = PipelineComposerRequirements(
Expand All @@ -70,11 +65,13 @@ def run_credit_scoring_problem(train_file_path, test_file_path,
)

# Create composer and with required composer params
composer = ComposerBuilder(task=task). \
with_optimizer_params(params). \
with_requirements(composer_requirements). \
with_metrics(metrics). \
build()
composer = (
ComposerBuilder(task=task)
.with_optimizer_params(params)
.with_requirements(composer_requirements)
.with_metrics(metrics)
.build()
)

# the optimal pipeline generation by composition - the most time-consuming task
pipelines_evo_composed = composer.compose_pipeline(data=dataset_to_compose)
Expand All @@ -88,11 +85,13 @@ def run_credit_scoring_problem(train_file_path, test_file_path,

for pipeline_num, pipeline_evo_composed in enumerate(pipelines_evo_composed):

tuner = TunerBuilder(task)\
.with_tuner(SequentialTuner)\
.with_iterations(50)\
.with_metric(metrics[0])\
tuner = (
TunerBuilder(task)
.with_tuner(SequentialTuner)
.with_iterations(50)
.with_metric(metrics[0])
.build(dataset_to_compose)
)
nodes = pipeline_evo_composed.nodes
for node_index, node in enumerate(nodes):
if isinstance(node, PipelineNode) and node.is_primary:
Expand All @@ -115,5 +114,7 @@ def run_credit_scoring_problem(train_file_path, test_file_path,


if __name__ == '__main__':
set_random_seed(12)

full_path_train, full_path_test = get_scoring_data()
run_credit_scoring_problem(full_path_train, full_path_test, visualization=True)
2 changes: 1 addition & 1 deletion cases/metocean_forecasting_problem.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ def run_metocean_forecasting_problem(train_file_path, test_file_path,

fedot = Fedot(problem='ts_forecasting',
task_params=TsForecastingParams(forecast_length=forecast_length),
timeout=timeout, logging_level=logging.DEBUG)
timeout=timeout, logging_level=logging.FATAL)

pipeline = fedot.fit(features=historical_data, target=ssh_history)
fedot.forecast(historical_data)
Expand Down
1 change: 0 additions & 1 deletion docs/source/advanced/cli_call.rst
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,6 @@ The result of execution is presented below:
Composer parameter: model names to use
--tuning TUNING Composer parameter: 1 - with tuning, 0 - without tuning
--cv_folds CV_FOLDS Composer parameter: Number of folds for cross-validation
--val_bl VAL_BL Composer parameter: Number of validation blocks for time series forecasting
--hist_path HIST_PATH
Composer parameter: Name of the folder for composing history
--for_len FOR_LEN Time Series Forecasting parameter: forecast length
Expand Down
13 changes: 8 additions & 5 deletions examples/advanced/additional_learning.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from fedot.core.pipelines.pipeline import Pipeline
from fedot.core.pipelines.pipeline_builder import PipelineBuilder
from fedot.core.utils import fedot_project_root
from fedot.core.utils import set_random_seed


def run_additional_learning_example():
Expand All @@ -22,7 +23,7 @@ def run_additional_learning_example():

problem = 'classification'

auto_model = Fedot(problem=problem, seed=42, timeout=5, preset='best_quality',
auto_model = Fedot(problem=problem, timeout=5, preset='best_quality',
initial_assumption=PipelineBuilder().add_node('scaling').add_node('logit').build())

auto_model.fit(features=deepcopy(train_data.head(1000)), target='target')
Expand All @@ -40,16 +41,16 @@ def run_additional_learning_example():
train_data = train_data.head(5000)
timeout = 1

auto_model_from_atomized = Fedot(problem=problem, seed=42, preset='best_quality', timeout=timeout,
logging_level=logging.INFO,
auto_model_from_atomized = Fedot(problem=problem, preset='best_quality', timeout=timeout,
logging_level=logging.FATAL,
initial_assumption=atomized_model)
auto_model_from_atomized.fit(features=deepcopy(train_data), target='target')
auto_model_from_atomized.predict_proba(features=deepcopy(test_data))
auto_model_from_atomized.current_pipeline.show()
print('auto_model_from_atomized', auto_model_from_atomized.get_metrics(deepcopy(test_data_target)))

auto_model_from_pipeline = Fedot(problem=problem, seed=42, preset='best_quality', timeout=timeout,
logging_level=logging.INFO,
auto_model_from_pipeline = Fedot(problem=problem, preset='best_quality', timeout=timeout,
logging_level=logging.FATAL,
initial_assumption=non_atomized_model)
auto_model_from_pipeline.fit(features=deepcopy(train_data), target='target')
auto_model_from_pipeline.predict_proba(features=deepcopy(test_data))
Expand All @@ -58,4 +59,6 @@ def run_additional_learning_example():


if __name__ == '__main__':
set_random_seed(42)

run_additional_learning_example()
19 changes: 9 additions & 10 deletions examples/advanced/decompose/classification_refinement_example.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,3 @@
import random

import numpy as np
from golem.core.tuning.simultaneous import SimultaneousTuner

from cases.credit_scoring.credit_scoring_problem import get_scoring_data, calculate_validation_metric
Expand All @@ -10,9 +7,7 @@
from fedot.core.pipelines.tuning.tuner_builder import TunerBuilder
from fedot.core.repository.quality_metrics_repository import ClassificationMetricsEnum
from fedot.core.repository.tasks import Task, TaskTypesEnum

random.seed(1)
np.random.seed(1)
from fedot.core.utils import set_random_seed


def get_refinement_pipeline():
Expand Down Expand Up @@ -66,11 +61,13 @@ def run_refinement_scoring_example(train_path, test_path, with_tuning=False):
display_roc_auc(decompose_c, test_dataset, 'With decomposition pipeline')

if with_tuning:
tuner = TunerBuilder(task) \
.with_tuner(SimultaneousTuner)\
.with_metric(ClassificationMetricsEnum.ROCAUC)\
.with_iterations(30) \
tuner = (
TunerBuilder(task)
.with_tuner(SimultaneousTuner)
.with_metric(ClassificationMetricsEnum.ROCAUC)
.with_iterations(30)
.build(train_dataset)
)
no_decompose_c = tuner.tune(no_decompose_c)
decompose_c = tuner.tune(decompose_c)

Expand All @@ -82,5 +79,7 @@ def run_refinement_scoring_example(train_path, test_path, with_tuning=False):


if __name__ == '__main__':
set_random_seed(1)

full_path_train, full_path_test = get_scoring_data()
run_refinement_scoring_example(full_path_train, full_path_test, with_tuning=True)
5 changes: 4 additions & 1 deletion examples/advanced/decompose/refinement_forecast_example.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,10 @@
from fedot.core.pipelines.ts_wrappers import in_sample_ts_forecast
from fedot.core.repository.dataset_types import DataTypesEnum
from fedot.core.repository.tasks import TaskTypesEnum, Task, TsForecastingParams
from fedot.core.utils import set_random_seed


warnings.filterwarnings('ignore')
np.random.seed(2020)


def get_refinement_pipeline_with_polyfit():
Expand Down Expand Up @@ -160,6 +161,8 @@ def run_refinement_forecast(path_to_file, len_forecast=100, lagged=150,


if __name__ == '__main__':
set_random_seed(2020)

path = '../../../cases/data/time_series/economic_data.csv'
run_refinement_forecast(path, len_forecast=50, validation_blocks=5,
lagged=50, vis_with_decompose=True)
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ def run_with_random_search_composer():
composer_params = {'available_operations': ['class_decompose', 'rf', 'linear', 'xgboost', 'dt'],
'optimizer': RandomMutationSearchOptimizer}

automl = Fedot(problem='classification', timeout=1, logging_level=logging.DEBUG,
automl = Fedot(problem='classification', timeout=1, logging_level=logging.FATAL,
preset='fast_train', **composer_params)

automl.fit(train_data_path)
Expand Down
15 changes: 9 additions & 6 deletions examples/advanced/multimodal_text_num_example.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
from pathlib import Path

from fedot.api.main import Fedot
from fedot.core.data.data_split import train_test_data_setup
from fedot.core.data.multi_modal import MultiModalData
from fedot.core.utils import fedot_project_root
from fedot.core.utils import set_random_seed


def run_multi_modal_example(file_path: str, visualization=False, with_tuning=True) -> float:
def run_multi_modal_example(file_path: str, visualization: bool = False, with_tuning: bool = True,
timeout: float = 10.) -> float:
"""
Runs FEDOT on multimodal data from the `Wine Reviews dataset
<https://www.kaggle.com/datasets/zynicide/wine-reviews>`_.
Expand All @@ -18,16 +18,17 @@ def run_multi_modal_example(file_path: str, visualization=False, with_tuning=Tru
file_path: path to the file with multimodal data.
visualization: if True, then final pipeline will be visualised.
with_tuning: if True, then pipeline will be tuned.
timeout: overall fitting duration
Returns:
F1 metrics of the model.
"""
task = 'classification'
path = Path(fedot_project_root(), file_path)
path = fedot_project_root().joinpath(file_path)
data = MultiModalData.from_csv(file_path=path, task=task, target_columns='variety', index_col=None)
fit_data, predict_data = train_test_data_setup(data, shuffle_flag=True, split_ratio=0.7)

automl_model = Fedot(problem=task, timeout=10, with_tuning=with_tuning)
automl_model = Fedot(problem=task, timeout=timeout, with_tuning=with_tuning, n_jobs=1)
automl_model.fit(features=fit_data,
target=fit_data.target)

Expand All @@ -39,8 +40,10 @@ def run_multi_modal_example(file_path: str, visualization=False, with_tuning=Tru

print(f'F1 for validation sample is {round(metrics["f1"], 3)}')

return metrics["f1"]
return metrics['f1']


if __name__ == '__main__':
set_random_seed(42)

run_multi_modal_example(file_path='examples/data/multimodal_wine.csv', visualization=True)
7 changes: 5 additions & 2 deletions examples/advanced/multiobj_optimisation.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

from fedot.api.main import Fedot
from fedot.core.utils import fedot_project_root
from fedot.core.utils import set_random_seed


def run_classification_multiobj_example(visualization=False, timeout=1, with_tuning=True):
Expand All @@ -11,8 +12,8 @@ def run_classification_multiobj_example(visualization=False, timeout=1, with_tun
del test_data['class']
problem = 'classification'

metric_names = ['f1', 'node_num']
auto_model = Fedot(problem=problem, timeout=timeout, preset='best_quality', seed=42,
metric_names = ['f1', 'node_number']
auto_model = Fedot(problem=problem, timeout=timeout, preset='best_quality',
metric=metric_names,
with_tuning=with_tuning)
auto_model.fit(features=train_data, target='class')
Expand All @@ -27,4 +28,6 @@ def run_classification_multiobj_example(visualization=False, timeout=1, with_tun


if __name__ == '__main__':
set_random_seed(42)

run_classification_multiobj_example(visualization=True)
2 changes: 1 addition & 1 deletion examples/advanced/parallelization_comparison.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ def run_experiments(timeout: float = None, partitions_n=10, n_jobs=-1):
train_data_tmp = train_data.iloc[:partition].copy()
start_time = timeit.default_timer()
auto_model = Fedot(problem=problem, seed=42, timeout=timeout,
n_jobs=_n_jobs, logging_level=logging.NOTSET,
n_jobs=_n_jobs, logging_level=logging.FATAL,
with_tuning=False, preset='fast_train')
auto_model.fit(features=train_data_tmp, target='target')
times[_n_jobs].append((timeit.default_timer() - start_time) / 60)
Expand Down
6 changes: 2 additions & 4 deletions examples/advanced/profiler_example.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,14 @@
import os
import random

import numpy as np
from golem.utilities.profiler.memory_profiler import MemoryProfiler
from golem.utilities.profiler.time_profiler import TimeProfiler

from cases.credit_scoring.credit_scoring_problem import run_credit_scoring_problem, get_scoring_data
from fedot.core.utils import set_random_seed

random.seed(1)
np.random.seed(1)

if __name__ == '__main__':
set_random_seed(1)
# JUST UNCOMMENT WHAT TYPE OF PROFILER DO YOU NEED
# EXAMPLE of MemoryProfiler.

Expand Down
Loading

0 comments on commit 89ff552

Please sign in to comment.