Skip to content

Commit cd5db13

Browse files
committed
refactorings and renamings for unifying various generators
1 parent 8c24ef0 commit cd5db13

17 files changed

+30
-116
lines changed

common/feature_generation.py common/gen_features.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,8 @@
1212
import scipy.stats as stats
1313

1414
from common.utils import *
15-
from common.feature_generation_rolling_agg import *
16-
from common.feature_generation_rolling_agg import _aggregate_last_rows
15+
from common.gen_features_rolling_agg import *
16+
from common.gen_features_rolling_agg import _aggregate_last_rows
1717

1818
"""
1919
Feature generators.
File renamed without changes.

common/label_generation_highlow.py common/gen_labels_highlow.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,8 @@
77
import pandas as pd
88

99
from common.utils import *
10-
from common.feature_generation import *
11-
from common.feature_generation_rolling_agg import *
10+
from common.gen_features import *
11+
from common.gen_features_rolling_agg import *
1212

1313
"""
1414
Label generation. Labels are features which are used for training.
File renamed without changes.
File renamed without changes.

common/utils.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010

1111
from binance.helpers import date_to_milliseconds, interval_to_milliseconds
1212

13-
from common.feature_generation import *
13+
from common.gen_features import *
1414

1515
#
1616
# Decimals

scripts/depth_to_features.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
import numpy as np
1212

1313
from common.utils import *
14-
from common.feature_generation import *
14+
from common.gen_features import *
1515
from common.depth_processing import *
1616

1717
"""

scripts/features.py

+2-93
Original file line numberDiff line numberDiff line change
@@ -6,13 +6,8 @@
66
import pandas as pd
77

88
from service.App import *
9-
from common.feature_generation import *
10-
from common.label_generation_highlow import generate_labels_highlow, generate_labels_highlow2
11-
from common.label_generation_topbot import generate_labels_topbot, generate_labels_topbot2
12-
from common.signal_generation import (
13-
generate_smoothen_scores, generate_combine_scores,
14-
generate_threshold_rule, generate_threshold_rule2
15-
)
9+
from common.generators import generate_feature_set
10+
1611

1712
#
1813
# Parameters
@@ -103,91 +98,5 @@ def main(config_file):
10398
print(f"Output file location: {out_path}")
10499

105100

106-
def generate_feature_set(df: pd.DataFrame, fs: dict, last_rows: int) -> Tuple[pd.DataFrame, list]:
107-
"""
108-
Apply the specified resolved feature generator to the input data set.
109-
"""
110-
111-
#
112-
# Select columns from the data set to be processed by the feature generator
113-
#
114-
cp = fs.get("column_prefix")
115-
if cp:
116-
cp = cp + "_"
117-
f_cols = [col for col in df if col.startswith(cp)]
118-
f_df = df[f_cols] # Alternatively: f_df = df.loc[:, df.columns.str.startswith(cf)]
119-
# Remove prefix because feature generators are generic (a prefix will be then added to derived features before adding them back to the main frame)
120-
f_df = f_df.rename(columns=lambda x: x[len(cp):] if x.startswith(cp) else x) # Alternatively: f_df.columns = f_df.columns.str.replace(cp, "")
121-
else:
122-
f_df = df[df.columns.to_list()] # We want to have a different data frame object to add derived featuers and then join them back to the main frame with prefix
123-
124-
#
125-
# Resolve and apply feature generator functions from the configuration
126-
#
127-
generator = fs.get("generator")
128-
gen_config = fs.get('config', {})
129-
if generator == "itblib":
130-
features = generate_features_itblib(f_df, gen_config, last_rows=last_rows)
131-
elif generator == "depth":
132-
features = generate_features_depth(f_df)
133-
elif generator == "tsfresh":
134-
features = generate_features_tsfresh(f_df, gen_config, last_rows=last_rows)
135-
elif generator == "talib":
136-
features = generate_features_talib(f_df, gen_config, last_rows=last_rows)
137-
elif generator == "itbstats":
138-
features = generate_features_itbstats(f_df, gen_config, last_rows=last_rows)
139-
140-
# Labels
141-
elif generator == "highlow":
142-
horizon = gen_config.get("horizon")
143-
144-
# Binary labels whether max has exceeded a threshold or not
145-
print(f"Generating 'highlow' labels with horizon {horizon}...")
146-
features = generate_labels_highlow(f_df, horizon=horizon)
147-
148-
print(f"Finished generating 'highlow' labels. {len(features)} labels generated.")
149-
elif generator == "highlow2":
150-
print(f"Generating 'highlow2' labels...")
151-
f_df, features = generate_labels_highlow2(f_df, gen_config)
152-
print(f"Finished generating 'highlow2' labels. {len(features)} labels generated.")
153-
elif generator == "topbot":
154-
column_name = gen_config.get("columns", "close")
155-
156-
top_level_fracs = [0.01, 0.02, 0.03, 0.04, 0.05]
157-
bot_level_fracs = [-x for x in top_level_fracs]
158-
159-
f_df, features = generate_labels_topbot(f_df, column_name, top_level_fracs, bot_level_fracs)
160-
elif generator == "topbot2":
161-
f_df, features = generate_labels_topbot2(f_df, gen_config)
162-
163-
# Signals
164-
elif generator == "smoothen":
165-
f_df, features = generate_smoothen_scores(f_df, gen_config)
166-
elif generator == "combine":
167-
f_df, features = generate_combine_scores(f_df, gen_config)
168-
elif generator == "threshold_rule":
169-
f_df, features = generate_threshold_rule(f_df, gen_config)
170-
elif generator == "threshold_rule2":
171-
f_df, features = generate_threshold_rule2(f_df, gen_config)
172-
173-
else:
174-
print(f"Unknown feature generator {generator}")
175-
return
176-
177-
#
178-
# Add generated features to the main data frame with all other columns and features
179-
#
180-
f_df = f_df[features]
181-
fp = fs.get("feature_prefix")
182-
if fp:
183-
f_df = f_df.add_prefix(fp + "_")
184-
185-
new_features = f_df.columns.to_list()
186-
187-
df = df.join(f_df) # Attach all derived features to the main frame
188-
189-
return df, new_features
190-
191-
192101
if __name__ == '__main__':
193102
main()

scripts/labels.py

-2
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,6 @@
55

66
from service.App import *
77
from scripts.features import generate_feature_set
8-
from common.label_generation_highlow import *
9-
from common.label_generation_topbot import *
108

119
"""
1210
This script will load a feature file (or any file with close price), and add

scripts/predict.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,8 @@
77
import pandas as pd
88

99
from service.App import *
10+
from common.gen_features import *
1011
from common.classifiers import *
11-
from common.feature_generation import *
1212
from common.model_store import *
1313

1414
"""

scripts/predict_rolling.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,8 @@
88

99
from service.App import *
1010
from common.utils import *
11+
from common.gen_features import *
1112
from common.classifiers import *
12-
from common.feature_generation import *
1313
from common.model_store import *
1414

1515
"""

scripts/signals.py

+14-5
Original file line numberDiff line numberDiff line change
@@ -9,13 +9,11 @@
99
from sklearn.model_selection import ParameterGrid
1010

1111
from service.App import *
12-
from common.label_generation_topbot import *
13-
from common.signal_generation import *
12+
from common.gen_signals import *
1413

1514
"""
16-
Use predictions to process scores, generate signals and simulate trades over the whole period.
17-
The results of the trade simulation with signals and performances is stored in the output file.
18-
The results can be used to further analyze (also visually) the selected signal and trade strategy.
15+
Generate new derived columns according to the signal definitions.
16+
The transformations are applied to the results of ML predictions.
1917
"""
2018

2119
class P:
@@ -74,6 +72,11 @@ def main(config_file):
7472
# Maximum possible on labels themselves
7573
#performance_long, performance_short, long_count, short_count, long_profitable, short_profitable, longs, shorts = performance_score(df, 'top10_2', 'bot10_2', 'close')
7674

75+
76+
77+
78+
79+
7780
#
7881
# Aggregate and post-process
7982
#
@@ -115,6 +118,12 @@ def main(config_file):
115118
else: # Default one dim rule
116119
apply_rule_with_score_thresholds(df, score_column_names, trade_model)
117120

121+
122+
123+
124+
125+
126+
118127
#
119128
# Simulate trade and compute performance using close price and two boolean signals
120129
# Add a pair of two dicts: performance dict and model parameters dict

scripts/train.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,8 @@
77
import pandas as pd
88

99
from service.App import *
10+
from common.gen_features import *
1011
from common.classifiers import *
11-
from common.feature_generation import *
1212
from common.model_store import *
1313

1414
"""

scripts/train_signals.py

+1-2
Original file line numberDiff line numberDiff line change
@@ -10,9 +10,8 @@
1010

1111
from service.App import *
1212
from common.utils import *
13+
from common.gen_signals import *
1314
from common.classifiers import *
14-
from common.label_generation_topbot import *
15-
from common.signal_generation import *
1615

1716
"""
1817
Input data:

service/analyzer.py

+1-2
Original file line numberDiff line numberDiff line change
@@ -10,9 +10,8 @@
1010

1111
from service.App import *
1212
from common.utils import *
13+
from common.generators import generate_feature_set
1314
from common.classifiers import *
14-
from common.feature_generation import *
15-
from common.signal_generation import *
1615
from common.model_store import *
1716

1817
from scripts.merge import *

tests/test_label_generation.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,8 @@
22
import numpy.testing as npt
33

44
from common.utils import *
5-
from common.signal_generation import *
6-
from common.label_generation_topbot import *
5+
from common.gen_signals import *
6+
from common.gen_labels_topbot import *
77

88

99
def test_extremum_labels():

tests/test_utils.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33

44
from common.utils import *
55
from common.utils import add_area_ratio
6-
from common.signal_generation import *
6+
from common.gen_signals import *
77

88

99
def test_decimal():

0 commit comments

Comments
 (0)