Skip to content

Commit d095ff9

Browse files
committedDec 17, 2024
add possitibility of regression models for nn algorithm
1 parent 66837d5 commit d095ff9

File tree

4 files changed

+102
-34
lines changed

4 files changed

+102
-34
lines changed
 

‎common/classifiers.py

+93-24
Original file line numberDiff line numberDiff line change
@@ -7,9 +7,6 @@
77
from sklearn import metrics
88
from sklearn.model_selection import ParameterGrid
99
from sklearn.preprocessing import StandardScaler
10-
from sklearn.metrics import precision_score
11-
from sklearn.metrics import recall_score
12-
from sklearn.metrics import f1_score
1310

1411
from sklearn.linear_model import LogisticRegression, SGDClassifier
1512
from sklearn.svm import SVC, SVR
@@ -41,6 +38,9 @@ def train_gb(df_X, df_y, model_config: dict):
4138
"""
4239
Train model with the specified hyper-parameters and return this model (and scaler if any).
4340
"""
41+
is_scale = model_config.get("train", {}).get("is_scale", False)
42+
is_regression = model_config.get("train", {}).get("is_regression", False)
43+
4444
#
4545
# Double column set if required
4646
#
@@ -54,7 +54,6 @@ def train_gb(df_X, df_y, model_config: dict):
5454
#
5555
# Scale
5656
#
57-
is_scale = model_config.get("train", {}).get("is_scale", False)
5857
if is_scale:
5958
scaler = StandardScaler()
6059
scaler.fit(df_X)
@@ -176,6 +175,9 @@ def train_nn(df_X, df_y, model_config: dict):
176175
"""
177176
Train model with the specified hyper-parameters and return this model (and scaler if any).
178177
"""
178+
is_scale = model_config.get("train", {}).get("is_scale", True)
179+
is_regression = model_config.get("train", {}).get("is_regression", False)
180+
179181
#
180182
# Double column set if required
181183
#
@@ -189,7 +191,6 @@ def train_nn(df_X, df_y, model_config: dict):
189191
#
190192
# Scale
191193
#
192-
is_scale = model_config.get("train", {}).get("is_scale", True)
193194
if is_scale:
194195
scaler = StandardScaler()
195196
scaler.fit(df_X)
@@ -227,19 +228,31 @@ def train_nn(df_X, df_y, model_config: dict):
227228
model.add(Dense(out_features, activation='sigmoid', input_dim=in_features)) # , kernel_regularizer=l2(reg_l2)
228229
#model.add(Dropout(rate=0.5))
229230

230-
model.add(Dense(1, activation='sigmoid'))
231-
232-
# Compile model
233-
optimizer = Adam(learning_rate=learning_rate)
234-
model.compile(
235-
loss='binary_crossentropy',
236-
optimizer=optimizer,
237-
metrics=[
238-
tf.keras.metrics.AUC(name="auc"),
239-
tf.keras.metrics.Precision(name="precision"),
240-
tf.keras.metrics.Recall(name="recall"),
241-
],
242-
)
231+
if is_regression:
232+
model.add(Dense(units=1))
233+
234+
model.compile(
235+
loss='mse',
236+
optimizer=Adam(learning_rate=learning_rate),
237+
metrics=[
238+
tf.keras.metrics.MeanAbsoluteError(name="mean_absolute_error"),
239+
tf.keras.metrics.MeanAbsolutePercentageError(name="mean_absolute_percentage_error"),
240+
tf.keras.metrics.R2Score(name="r2_score"),
241+
],
242+
)
243+
else:
244+
model.add(Dense(units=1, activation='sigmoid'))
245+
246+
model.compile(
247+
loss='binary_crossentropy',
248+
optimizer=Adam(learning_rate=learning_rate),
249+
metrics=[
250+
tf.keras.metrics.AUC(name="auc"),
251+
tf.keras.metrics.Precision(name="precision"),
252+
tf.keras.metrics.Recall(name="recall"),
253+
],
254+
)
255+
243256
#model.summary()
244257

245258
es = EarlyStopping(
@@ -328,6 +341,9 @@ def train_lc(df_X, df_y, model_config: dict):
328341
"""
329342
Train model with the specified hyper-parameters and return this model (and scaler if any).
330343
"""
344+
is_scale = model_config.get("train", {}).get("is_scale", True)
345+
is_regression = model_config.get("train", {}).get("is_regression", False)
346+
331347
#
332348
# Double column set if required
333349
#
@@ -341,7 +357,6 @@ def train_lc(df_X, df_y, model_config: dict):
341357
#
342358
# Scale
343359
#
344-
is_scale = model_config.get("train", {}).get("is_scale", True)
345360
if is_scale:
346361
scaler = StandardScaler()
347362
scaler.fit(df_X)
@@ -526,11 +541,65 @@ def compute_scores(y_true, y_hat):
526541
recall = metrics.recall_score(y_true, y_hat_class)
527542

528543
scores = dict(
529-
auc=auc,
530-
ap=ap, # it summarizes precision-recall curve, should be equivalent to auc
531-
f1=f1,
532-
precision=precision,
533-
recall=recall,
544+
auc=round(auc, 3),
545+
ap=round(ap, 3),
546+
f1=round(f1, 3),
547+
precision=round(precision, 3),
548+
recall=round(recall, 3),
549+
)
550+
551+
return scores
552+
553+
554+
def compute_scores_regression(y_true, y_hat):
555+
"""Compute regression scores. Input columns must have numeric data type."""
556+
557+
try:
558+
mae = metrics.mean_absolute_error(y_true, y_hat)
559+
except ValueError:
560+
mae = np.nan
561+
562+
try:
563+
mape = metrics.mean_absolute_percentage_error(y_true, y_hat)
564+
except ValueError:
565+
mape = np.nan
566+
567+
try:
568+
r2 = metrics.r2_score(y_true, y_hat)
569+
except ValueError:
570+
r2 = np.nan
571+
572+
#
573+
# How good it is in predicting the sign (increase of decrease)
574+
#
575+
576+
y_true_class = np.where(y_true.values > 0.0, +1, -1)
577+
y_hat_class = np.where(y_hat.values > 0.0, +1, -1)
578+
579+
try:
580+
auc = metrics.roc_auc_score(y_true_class, y_hat_class)
581+
except ValueError:
582+
auc = 0.0 # Only one class is present (if dataset is too small, e.g,. when debugging) or Nulls in predictions
583+
584+
try:
585+
ap = metrics.average_precision_score(y_true_class, y_hat_class)
586+
except ValueError:
587+
ap = 0.0 # Only one class is present (if dataset is too small, e.g,. when debugging) or Nulls in predictions
588+
589+
f1 = metrics.f1_score(y_true_class, y_hat_class)
590+
precision = metrics.precision_score(y_true_class, y_hat_class)
591+
recall = metrics.recall_score(y_true_class, y_hat_class)
592+
593+
scores = dict(
594+
mae=round(mae, 3),
595+
mape=round(mape, 3),
596+
r2=round(r2, 3),
597+
598+
auc=round(auc, 3),
599+
ap=round(ap, 3),
600+
f1=round(f1, 3),
601+
precision=round(precision, 3),
602+
recall=round(recall, 3),
534603
)
535604

536605
return scores

‎common/generators.py

+5-6
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
import numpy as np
44
import pandas as pd
5+
from pandas.api.types import is_float_dtype, is_numeric_dtype, is_integer_dtype, is_string_dtype
56

67
from common.classifiers import *
78
from common.model_store import *
@@ -162,9 +163,8 @@ def predict_feature_set(df, fs, config, models: dict):
162163
# For each new score, compare it with the label true values
163164
if label in df:
164165
df_y = df[label]
165-
if df_y.dtype == "float64" and df_y_hat.dtype == "float64":
166-
# TODO Regression scores
167-
scores[score_column_name] = dict(rmse=0.0, mae=0.0, mse=0.0, mape=0.0, r2=0.0)
166+
if is_float_dtype(df_y) and is_float_dtype(df_y_hat):
167+
scores[score_column_name] = compute_scores_regression(df_y, df_y_hat) # Regression stores
168168
else:
169169
scores[score_column_name] = compute_scores(df_y, df_y_hat) # Classification stores
170170

@@ -233,9 +233,8 @@ def train_feature_set(df, fs, config):
233233

234234
out_df[score_column_name] = df_y_hat
235235

236-
if df_y.dtype == "float64" and df_y_hat.dtype == "float64":
237-
# TODO Regression scores
238-
scores[score_column_name] = dict(rmse=0.0, mae=0.0, mse=0.0, mape=0.0, r2=0.0)
236+
if is_float_dtype(df_y) and is_float_dtype(df_y_hat):
237+
scores[score_column_name] = compute_scores_regression(df_y, df_y_hat) # Regression stores
239238
else:
240239
scores[score_column_name] = compute_scores(df_y, df_y_hat) # Classification stores
241240

‎scripts/predict_rolling.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55

66
import numpy as np
77
import pandas as pd
8+
from pandas.api.types import is_float_dtype, is_numeric_dtype, is_integer_dtype, is_string_dtype
89

910
from service.App import *
1011
from common.utils import *
@@ -306,9 +307,8 @@ def main(config_file):
306307

307308
print(f"Using {len(df_scores)} non-nan rows for scoring.")
308309

309-
if y_true.dtype == "float64" and y_predicted.dtype == "float64":
310-
# TODO Regression scores
311-
score = dict(rmse=0.0, mae=0.0, mse=0.0, mape=0.0, r2=0.0)
310+
if is_float_dtype(y_true) and is_float_dtype(y_predicted):
311+
score = compute_scores_regression(y_true, y_predicted) # Regression stores
312312
else:
313313
score = compute_scores(y_true, y_predicted) # Classification stores
314314

‎scripts/train.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -89,7 +89,7 @@ def main(config_file):
8989
df = df.head(-label_horizon)
9090

9191
df.replace([np.inf, -np.inf], np.nan, inplace=True)
92-
#df = df.dropna(subset=labels)
92+
df = df.dropna(subset=labels)
9393
df = df.dropna(subset=train_features)
9494
if len(df) == 0:
9595
print(f"ERROR: Empty data set after removing NULLs in feature columns. Some features might have all NULL values.")

0 commit comments

Comments
 (0)
Please sign in to comment.