Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix some bugs #1174

Merged
merged 27 commits into from
Oct 17, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -113,22 +113,14 @@ def _check_and_correct_window_size(self, time_series: np.array, forecast_length:
Returns:

"""
prefix = "Warning: window size of lagged transformation was changed"

# Maximum threshold
removing_len = self.window_size + forecast_length
if removing_len > len(time_series):
previous_size = self.window_size
# At least 10 objects we need for training, so minus 10
window_size = len(time_series) - forecast_length - 10
self.params.update(window_size=window_size)
self.log.info(f"{prefix} from {previous_size} to {self.window_size}.")
if self.window_size + forecast_length > len(time_series):
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Что предполагается делать в случае, когда длина окна больше допустимой для конкретного ряда?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Кидать ошибку. Это некорректный случай.
Идея в том, чтобы убрать скрытое поведение. Если тюнер или композер хотят поставить некорректное значение окна, то пусть они узнают об этом.

raise ValueError(f"Window size is to high ({self.window_size}) for provided data len {len(time_series)}")

# Minimum threshold
if self.window_size < self.window_size_minimum:
previous_size = self.window_size
self.params.update(window_size=self.window_size_minimum)
self.log.info(f"{prefix} from {previous_size} to {self.window_size}")
raise ValueError(f"Window size is to low {self.window_size}. It should be greater")

def _update_column_types(self, output_data: OutputData):
"""Update column types after lagged transformation. All features becomes ``float``
Expand Down Expand Up @@ -344,7 +336,7 @@ class SparseLaggedTransformationImplementation(LaggedImplementation):
def __init__(self, params: Optional[OperationParameters]):
super().__init__(params)
self.sparse_transform = True
self.window_size_minimum = 6
self.window_size_minimum = 4
valer1435 marked this conversation as resolved.
Show resolved Hide resolved


class LaggedTransformationImplementation(LaggedImplementation):
Expand Down Expand Up @@ -735,8 +727,8 @@ def ts_to_table(idx, time_series: np.array, window_size: int, is_lag: bool = Fal
``updated_idx`` -> clipped indices of time series\n
``features_columns`` -> lagged time series feature table
"""
_temp = [time_series[i:-(window_size - i - 1)] for i in range(window_size - 1)] + [time_series[window_size - 1:]]
features_columns = np.array(_temp).T
features_columns = np.array([time_series[i:window_size + i]
valer1435 marked this conversation as resolved.
Show resolved Hide resolved
for i in range(time_series.shape[0] - window_size + 1)])

if is_lag:
updated_idx = np.concatenate([idx[window_size:], idx[-1:]])
Expand Down Expand Up @@ -834,17 +826,16 @@ def prepare_target(all_idx, idx, features_columns: np.array, target, forecast_le

# Multi-target transformation
if forecast_length > 1:
_temp = ([ts_target[i:-(forecast_length - i - 1)] for i in range(forecast_length - 1)] +
[ts_target[forecast_length - 1:]])
updated_target = np.array(_temp).T
updated_target = np.array([ts_target[i:forecast_length + i]
for i in range(ts_target.shape[0] - forecast_length + 1)])
valer1435 marked this conversation as resolved.
Show resolved Hide resolved

updated_idx = idx[: -forecast_length + 1]
updated_features = features_columns[: -forecast_length]
else:
# Forecast horizon equals to 1
updated_idx = idx
updated_features = features_columns[: -1]
updated_target = ts_target
updated_target = np.reshape(ts_target, (-1, 1))

return updated_idx, updated_features, updated_target

Expand Down
8 changes: 3 additions & 5 deletions fedot/core/pipelines/prediction_intervals/params.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
from dataclasses import dataclass
from dataclasses import field

from fedot.core.repository.operation_types_repository import get_operations_for_task
from fedot.core.repository.tasks import Task, TaskTypesEnum
from golem.core.tuning.simultaneous import SimultaneousTuner


Expand Down Expand Up @@ -46,11 +48,7 @@ class PredictionIntervalsParams:
# thus are removed.
# In the future this should be solved...
mutations_operations: List[str] = field(default_factory=lambda:
['lagged', 'glm', 'ridge', 'sparse_lagged', 'lasso', 'ts_naive_average',
'locf', 'pca', 'linear', 'smoothing', 'adareg', 'dtreg', 'gbr', 'lgbmreg',
'rfr', 'polyfit', 'sgdr', 'ets', 'svr', 'treg', 'fast_ica',
'poly_features', 'ransac_lin_reg', 'ransac_non_lin_reg', 'cut',
'isolation_forest_reg', 'gaussian_filter', 'diff_filter', 'exog_ts'])
get_operations_for_task(task=Task(task_type=TaskTypesEnum.ts_forecasting)))

ql_number_models: Union[int, str] = 10
ql_low_tuner: Optional[SimultaneousTuner] = None
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,12 @@ def solver_mutation_of_best_pipeline(train_input: InputData,
s += 1
pipeline.show()
start_time = time.time()
pipeline.fit(train_input)
try:
# TODO: create new approach to mutation generation:
# mutate and fit in one try in get_mutations/get_different_mutations
pipeline.fit(train_input)
except:
continue
pred = out_of_sample_ts_forecast(pipeline=pipeline, input_data=train_input, horizon=horizon)
metric_value = RMSE.get_value(pipeline=pipeline, reference_data=train_input, validation_blocks=2)
if show_progress:
Expand All @@ -98,7 +103,7 @@ def solver_mutation_of_best_pipeline(train_input: InputData,
if discard_inapropriate_pipelines:
predictions = []
maximal_metric_value = np.quantile(np.array(metric_values), keep_percentage)
for i, m in enumerate(mutations_of_best_pipeline):
for i in range(len(first_pred_constraints)):
if first_pred_constraints[i] and deviance_pred_constraints[i] and metric_values[i] < maximal_metric_value:
predictions.append(raw_predictions[i])
else:
Expand Down
21 changes: 9 additions & 12 deletions fedot/core/pipelines/prediction_intervals/ts_mutation.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
import numpy as np
from typing import List

from golem.core.optimisers.genetic.gp_params import GPAlgorithmParameters
Expand Down Expand Up @@ -35,7 +34,6 @@ def get_ts_mutation(individual: Individual, operations: List[str]):
task=Task(task_type))

mutation = Mutation(parameters, requirements, graph_params)

return mutation._mutation(individual)[0]


Expand All @@ -51,8 +49,7 @@ def get_mutations(individual: Individual, number_mutations: int, operations: Lis
list of mutations of given individual. Mutations can be identical.
"""
mutations = [get_ts_mutation(individual, operations) for _ in range(number_mutations)]

return mutations
return [x for x in mutations if x is not None]


def get_different_mutations(individual: Individual,
Expand All @@ -69,19 +66,19 @@ def get_different_mutations(individual: Individual,
Returns:
list of mutations of given individual. Mutations must be different.
"""
mutations = []
graph_list = []
s = 1
mutations, graph_list = [], []
maximal_number_iterations = number_mutations * 3

while (len(mutations) < number_mutations and s <= maximal_number_iterations):
s += 1
for _ in range(maximal_number_iterations):
new_ind = get_ts_mutation(individual, operations)
if np.array([get_distance_between(new_ind.graph, x, compare_node_params=False) > 0 for x in graph_list]).all():
if new_ind is not None and all(get_distance_between(graph_1=new_ind.graph,
graph_2=x,
compare_node_params=False) for x in graph_list):
graph_list.append(new_ind.graph)
mutations.append(new_ind)
if len(mutations) == number_mutations:
break

if s == maximal_number_iterations + 1:
if len(mutations) != number_mutations:
logger.warning(f"Maximal number attempts {maximal_number_iterations} to build different mutations used.")
else:
logger.info(f"{number_mutations} different mutations are succesfully created.")
Expand Down
7 changes: 0 additions & 7 deletions fedot/core/pipelines/prediction_intervals/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,6 @@
from fedot.core.pipelines.pipeline import Pipeline
from fedot.api.main import Fedot
from fedot.core.data.data import InputData
from fedot.core.repository.tasks import Task, TaskTypesEnum
from fedot.core.repository.operation_types_repository import get_operations_for_task
from golem.core.optimisers.opt_history_objects.individual import Individual

from fedot.core.pipelines.prediction_intervals.params import PredictionIntervalsParams
Expand Down Expand Up @@ -129,11 +127,6 @@ def check_init_params(model: Fedot,

if not isinstance(params.mutations_operations, list):
raise ValueError('Argument mutations_operations must be list of strings.')
else:
all_possible_operations = get_operations_for_task(task=Task(task_type=TaskTypesEnum.ts_forecasting))
for x in params.mutations_operations:
if x not in all_possible_operations:
raise ValueError(f"Incorrect mutation '{x}' given in mutations_operations.")

if params.ql_number_models != 'max':
if not isinstance(params.ql_number_models, int) or params.ql_number_models < 1:
Expand Down
6 changes: 4 additions & 2 deletions fedot/core/repository/data/data_operation_repository.json
Original file line number Diff line number Diff line change
Expand Up @@ -220,7 +220,8 @@
"ransac_non_lin_reg": {
"meta": "regression_preprocessing",
"presets": ["fast_train", "*tree"],
"tags": ["affects_target", "non_linear", "filtering", "correct_params", "non_applicable_for_ts"]
"tags": ["affects_target", "non_linear", "filtering",
"correct_params", "non_applicable_for_ts", "non-default"]
},
"isolation_forest_reg": {
"meta": "regression_preprocessing",
Expand Down Expand Up @@ -309,7 +310,8 @@
"cutting",
"correct_params",
"non_lagged",
"ts_to_ts"
"ts_to_ts",
"non-default"
],
"input_type": "[DataTypesEnum.multi_ts, DataTypesEnum.ts]"
},
Expand Down
9 changes: 6 additions & 3 deletions fedot/core/repository/data/default_operation_params.json
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,8 @@
"xgboost": {
"eval_metric": "mlogloss",
"nthread": 1,
"n_jobs": 1
"n_jobs": 1,
"verbose": 0
},
"catboost": {
"allow_writing_files": false,
Expand All @@ -27,7 +28,8 @@
"subsample_freq": 10,
"learning_rate": 0.03,
"n_estimators": 100,
"n_jobs": 1
"n_jobs": 1,
"verbose": -1
},
"lgbmreg": {
"num_leaves": 32,
Expand All @@ -36,7 +38,8 @@
"subsample_freq": 10,
"learning_rate": 0.03,
"n_estimators": 100,
"n_jobs": 1
"n_jobs": 1,
"verbose": -1
},
"lagged": {
"window_size": 10
Expand Down
9 changes: 6 additions & 3 deletions fedot/core/repository/data/model_repository.json
Original file line number Diff line number Diff line change
Expand Up @@ -157,7 +157,8 @@
"meta": "ts_model",
"presets": ["ts"],
"tags": [
"non_linear"
"non_linear",
"non-default"
]
},
"bernb": {
Expand Down Expand Up @@ -209,7 +210,8 @@
"tags": [
"boosting",
"non_multi",
"non_linear"
"non_linear",
"non-default"
]
},
"kmeans": {
Expand Down Expand Up @@ -423,7 +425,8 @@
"presets": ["*tree"],
"tags": [
"tree",
"non_linear"
"non_linear",
"non-default"
]
},
"xgboost": {
Expand Down
11 changes: 9 additions & 2 deletions fedot/utilities/ts_gapfilling.py
Original file line number Diff line number Diff line change
Expand Up @@ -217,11 +217,11 @@ def _fill_first_and_last_gaps(self, input_data: np.array, output_data: np.array)
non_nan = output_data[non_nan_ids]
if np.isclose(input_data[0], self.gap_value):
# First element is a gap - replace with first known value
self.log.info(f'First element in the array were replaced by first known value')
self.log.info('First element in the array were replaced by first known value')
output_data[0] = non_nan[0]
if np.isclose(input_data[-1], self.gap_value):
# Last element is a gap - last known value
self.log.info(f'Last element in the array were replaced by last known value')
self.log.info('Last element in the array were replaced by last known value')
output_data[-1] = non_nan[-1]

return output_data
Expand Down Expand Up @@ -430,6 +430,13 @@ def __pipeline_fit_predict(self, pipeline, timeseries_train: np.array, len_gap:
task=task,
data_type=DataTypesEnum.ts)

forecast_length = input_data.task.task_params.forecast_length
data_length = input_data.features.shape[0]
for node in pipeline_for_forecast.nodes:
if node.name == 'lagged':
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Мне кажется, что это должно учитываться где-то внутри. Случайный пользователь не додумается до такого

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Так это и есть внутри. Разве нет?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Сейчас получается так, что, если у lagged преобразования длина окна слишком большая для конкретного ряда, то вываливается ошибка, которая нигде не обрабатывается. Это код только для замены пропусков, длина окна будет подбираться только для этой задачи, а в общем случае будет ошибка. То есть придется в любой код, в котором мы не хотим, чтобы вываливалась ошибка, писать подобные условия

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Все правильно, разве нет?
Если длина окна задана некорректно, то мы получаем ошибку. Это как с пайплайном. Сейчас некорректные начальные пайплайны вываливают ошибку, а не сами как-то корректируются.

if node.parameters['window_size'] + forecast_length >= data_length:
node.parameters = {'window_size': data_length - forecast_length - 1}
valer1435 marked this conversation as resolved.
Show resolved Hide resolved

# Making predictions for the missing part in the time series
pipeline_for_forecast.fit_from_scratch(input_data)

Expand Down
Loading
Loading