|
6 | 6 | import pandas as pd
|
7 | 7 |
|
8 | 8 | from service.App import *
|
9 |
| -from common.feature_generation import * |
10 |
| -from common.label_generation_highlow import generate_labels_highlow, generate_labels_highlow2 |
11 |
| -from common.label_generation_topbot import generate_labels_topbot, generate_labels_topbot2 |
12 |
| -from common.signal_generation import ( |
13 |
| - generate_smoothen_scores, generate_combine_scores, |
14 |
| - generate_threshold_rule, generate_threshold_rule2 |
15 |
| -) |
| 9 | +from common.generators import generate_feature_set |
| 10 | + |
16 | 11 |
|
17 | 12 | #
|
18 | 13 | # Parameters
|
@@ -103,91 +98,5 @@ def main(config_file):
|
103 | 98 | print(f"Output file location: {out_path}")
|
104 | 99 |
|
105 | 100 |
|
106 |
| -def generate_feature_set(df: pd.DataFrame, fs: dict, last_rows: int) -> Tuple[pd.DataFrame, list]: |
107 |
| - """ |
108 |
| - Apply the specified resolved feature generator to the input data set. |
109 |
| - """ |
110 |
| - |
111 |
| - # |
112 |
| - # Select columns from the data set to be processed by the feature generator |
113 |
| - # |
114 |
| - cp = fs.get("column_prefix") |
115 |
| - if cp: |
116 |
| - cp = cp + "_" |
117 |
| - f_cols = [col for col in df if col.startswith(cp)] |
118 |
| - f_df = df[f_cols] # Alternatively: f_df = df.loc[:, df.columns.str.startswith(cf)] |
119 |
| - # Remove prefix because feature generators are generic (a prefix will be then added to derived features before adding them back to the main frame) |
120 |
| - f_df = f_df.rename(columns=lambda x: x[len(cp):] if x.startswith(cp) else x) # Alternatively: f_df.columns = f_df.columns.str.replace(cp, "") |
121 |
| - else: |
122 |
| - f_df = df[df.columns.to_list()] # We want to have a different data frame object to add derived featuers and then join them back to the main frame with prefix |
123 |
| - |
124 |
| - # |
125 |
| - # Resolve and apply feature generator functions from the configuration |
126 |
| - # |
127 |
| - generator = fs.get("generator") |
128 |
| - gen_config = fs.get('config', {}) |
129 |
| - if generator == "itblib": |
130 |
| - features = generate_features_itblib(f_df, gen_config, last_rows=last_rows) |
131 |
| - elif generator == "depth": |
132 |
| - features = generate_features_depth(f_df) |
133 |
| - elif generator == "tsfresh": |
134 |
| - features = generate_features_tsfresh(f_df, gen_config, last_rows=last_rows) |
135 |
| - elif generator == "talib": |
136 |
| - features = generate_features_talib(f_df, gen_config, last_rows=last_rows) |
137 |
| - elif generator == "itbstats": |
138 |
| - features = generate_features_itbstats(f_df, gen_config, last_rows=last_rows) |
139 |
| - |
140 |
| - # Labels |
141 |
| - elif generator == "highlow": |
142 |
| - horizon = gen_config.get("horizon") |
143 |
| - |
144 |
| - # Binary labels whether max has exceeded a threshold or not |
145 |
| - print(f"Generating 'highlow' labels with horizon {horizon}...") |
146 |
| - features = generate_labels_highlow(f_df, horizon=horizon) |
147 |
| - |
148 |
| - print(f"Finished generating 'highlow' labels. {len(features)} labels generated.") |
149 |
| - elif generator == "highlow2": |
150 |
| - print(f"Generating 'highlow2' labels...") |
151 |
| - f_df, features = generate_labels_highlow2(f_df, gen_config) |
152 |
| - print(f"Finished generating 'highlow2' labels. {len(features)} labels generated.") |
153 |
| - elif generator == "topbot": |
154 |
| - column_name = gen_config.get("columns", "close") |
155 |
| - |
156 |
| - top_level_fracs = [0.01, 0.02, 0.03, 0.04, 0.05] |
157 |
| - bot_level_fracs = [-x for x in top_level_fracs] |
158 |
| - |
159 |
| - f_df, features = generate_labels_topbot(f_df, column_name, top_level_fracs, bot_level_fracs) |
160 |
| - elif generator == "topbot2": |
161 |
| - f_df, features = generate_labels_topbot2(f_df, gen_config) |
162 |
| - |
163 |
| - # Signals |
164 |
| - elif generator == "smoothen": |
165 |
| - f_df, features = generate_smoothen_scores(f_df, gen_config) |
166 |
| - elif generator == "combine": |
167 |
| - f_df, features = generate_combine_scores(f_df, gen_config) |
168 |
| - elif generator == "threshold_rule": |
169 |
| - f_df, features = generate_threshold_rule(f_df, gen_config) |
170 |
| - elif generator == "threshold_rule2": |
171 |
| - f_df, features = generate_threshold_rule2(f_df, gen_config) |
172 |
| - |
173 |
| - else: |
174 |
| - print(f"Unknown feature generator {generator}") |
175 |
| - return |
176 |
| - |
177 |
| - # |
178 |
| - # Add generated features to the main data frame with all other columns and features |
179 |
| - # |
180 |
| - f_df = f_df[features] |
181 |
| - fp = fs.get("feature_prefix") |
182 |
| - if fp: |
183 |
| - f_df = f_df.add_prefix(fp + "_") |
184 |
| - |
185 |
| - new_features = f_df.columns.to_list() |
186 |
| - |
187 |
| - df = df.join(f_df) # Attach all derived features to the main frame |
188 |
| - |
189 |
| - return df, new_features |
190 |
| - |
191 |
| - |
192 | 101 | if __name__ == '__main__':
|
193 | 102 | main()
|
0 commit comments