Skip to content

Commit 8c24ef0

Browse files
committed
refactor the structure of processing for online/stream part (batch scripts to be updated)
1 parent 8bd22a6 commit 8c24ef0

File tree

10 files changed

+432
-620
lines changed

10 files changed

+432
-620
lines changed

common/feature_generation.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -106,7 +106,7 @@ def generate_features_talib(df, config: dict, last_rows: int = 0):
106106
Apply TA functions from talib according to the specified configuration parameters.
107107
108108
config = {
109-
"parameters": {"relative": True, "realtive_to_last": True, "percentage": True},
109+
"parameters": {"relative": True, "relative_to_last": True, "percentage": True},
110110
"columns": ["close"],
111111
"functions": ["SMA"],
112112
"windows": [2, 3], # If numbers, then to argument timeperiod. If dict, then

common/model_store.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -70,11 +70,11 @@ def load_model_pair(model_path, score_column_name: str):
7070
return (model, scaler)
7171

7272

73-
def load_models(model_path, labels: list, train_features: list, algorithms: list):
74-
"""Load all model pairs for all combinations of the model parameters and return as a dict."""
73+
def load_models(model_path, labels: list, algorithms: list):
74+
"""Load all model pairs for all combinations of labels and algorithms and return as a dict."""
7575
models = {}
76-
for predicted_label in itertools.product(labels, train_features, algorithms):
77-
score_column_name = predicted_label[0] + label_algo_separator + predicted_label[1][0] + label_algo_separator + predicted_label[2]
76+
for label_algorithm in itertools.product(labels, algorithms):
77+
score_column_name = label_algorithm[0] + label_algo_separator + label_algorithm[1]["name"]
7878
model_pair = load_model_pair(model_path, score_column_name)
7979
models[score_column_name] = model_pair
8080
return models

common/signal_generation.py

Lines changed: 144 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
import os
22
from datetime import datetime, timezone, timedelta
3-
from typing import Union, List
3+
from typing import Union, List, Tuple
44
import json
55

66
import numpy as np
@@ -17,6 +17,54 @@
1717
"""
1818

1919

20+
def generate_smoothen_scores(df, config: dict):
21+
"""
22+
Smoothen several columns and rows. Used for smoothing scores.
23+
24+
The following operations are applied:
25+
- find average of the specified input columns (row-wise)
26+
- find moving average with the specified window
27+
- apply threshold to source buy/sell column(s) according to threshold parameter(s) by producing a boolean column
28+
29+
Notes:
30+
- Input point-wise scores in buy and sell columns are always positive
31+
"""
32+
33+
columns = config.get('columns')
34+
if not columns:
35+
raise ValueError(f"The 'columns' parameter must be a non-empty string. {type(columns)}")
36+
elif isinstance(columns, str):
37+
columns = [columns]
38+
39+
# TODO: check that all columns exist
40+
#if columns not in df.columns:
41+
# raise ValueError(f"{columns} do not exist in the input data. Existing columns: {df.columns.to_list()}")
42+
43+
# Average all buy and sell columns
44+
out_column = df[columns].mean(skipna=True, axis=1)
45+
46+
# Apply thresholds (if specified) and binarize the score
47+
point_threshold = config.get("point_threshold")
48+
if point_threshold:
49+
out_column = out_column >= point_threshold
50+
51+
# Moving average
52+
window = config.get("window")
53+
if isinstance(window, int):
54+
out_column = out_column.rolling(window, min_periods=window // 2).mean()
55+
elif isinstance(window, float):
56+
out_column = out_column.ewm(span=window, min_periods=window // 2, adjust=False).mean()
57+
58+
names = config.get('names')
59+
if not isinstance(names, str):
60+
raise ValueError(f"'names' parameter must be a non-empty string. {type(names)}")
61+
62+
df[names] = out_column
63+
64+
return df, [names]
65+
66+
67+
# TODO: DEPRECATED, REMOVE, REPLACE BY generator_smoothen_signals
2068
def aggregate_scores(df, model, score_column_out: str, score_columns: Union[List[str], str]):
2169
"""
2270
Add two signal numeric (buy and sell) columns by processing a list of buy and sell point-wise predictions.
@@ -62,6 +110,43 @@ def aggregate_scores(df, model, score_column_out: str, score_columns: Union[List
62110
return score_column
63111

64112

113+
def generate_combine_scores(df, config: dict):
114+
"""
115+
ML algorithms predict score which is always positive and typically within [0,1].
116+
One score for price growth and one score for price fall. This function combines pairs
117+
of such scores and produce one score within [-1,+1]. Positive values mean growth
118+
and negative values mean fall of price.
119+
"""
120+
columns = config.get('columns')
121+
if not columns:
122+
raise ValueError(f"The 'columns' parameter must be a non-empty string. {type(columns)}")
123+
elif not isinstance(columns, list) or len(columns) != 2:
124+
raise ValueError(f"'columns' parameter must be a list with buy column name and sell column name. {type(columns)}")
125+
126+
up_column = columns[0]
127+
down_column = columns[1]
128+
129+
out_column = config.get('names')
130+
131+
if config.get("combine") == "relative":
132+
combine_scores_relative(df, up_column, down_column, out_column)
133+
elif config.get("combine") == "difference":
134+
combine_scores_difference(df, up_column, down_column, out_column)
135+
else:
136+
# If buy score is greater than sell score then positive buy, otherwise negative sell
137+
df[out_column] = df[[up_column, down_column]].apply(lambda x: x[0] if x[0] >= x[1] else -x[1], raw=True, axis=1)
138+
139+
# Scale the score distribution to make it symmetric or normalize
140+
# Always apply the transformation to buy score. It might be in [0,1] or [-1,+1] depending on combine parameter
141+
if config.get("coefficient"):
142+
df[out_column] = df[out_column] * config.get("coefficient")
143+
if config.get("constant"):
144+
df[out_column] = df[out_column] + config.get("constant")
145+
146+
return df, [out_column]
147+
148+
149+
# TODO: DEPRECATED, REMOVE, REPLACE BY generator_smoothen_signals
65150
def combine_scores(df, model, buy_score_column, sell_score_column, trade_score_column):
66151
"""
67152
Mutually adjust two independent scores with opposite semantics.
@@ -159,6 +244,32 @@ def linear_regr_fn(X):
159244
# Signal rules
160245
#
161246

247+
def generate_threshold_rule(df, config):
248+
"""
249+
Apply rules based on thresholds and generate trade signal buy, sell or do nothing.
250+
251+
Returns signals in two pre-defined columns: 'buy_signal_column' and 'sell_signal_column'
252+
"""
253+
parameters = config.get("parameters", {})
254+
255+
columns = config.get("columns")
256+
if not columns:
257+
raise ValueError(f"The 'columns' parameter must be a non-empty string. {type(columns)}")
258+
elif isinstance(columns, list):
259+
columns = [columns]
260+
261+
buy_signal_column = config.get("names")[0]
262+
sell_signal_column = config.get("names")[1]
263+
264+
df[buy_signal_column] = \
265+
(df[columns] >= parameters.get("buy_signal_threshold"))
266+
df[sell_signal_column] = \
267+
(df[columns] <= parameters.get("sell_signal_threshold"))
268+
269+
return df, [buy_signal_column, sell_signal_column]
270+
271+
272+
# TODO: DEPRECATED TO BE REMOVED
162273
def apply_rule_with_score_thresholds(df, score_column_names, model):
163274
"""
164275
Apply rules based on thresholds and generate trade signal buy, sell or do nothing.
@@ -178,6 +289,38 @@ def apply_rule_with_score_thresholds(df, score_column_names, model):
178289
(df[score_column] <= parameters.get("sell_signal_threshold"))
179290

180291

292+
def generate_threshold_rule2(df, config):
293+
"""
294+
Assume using difference combination with negative sell scores
295+
"""
296+
parameters = config.get("parameters", {})
297+
298+
columns = config.get("columns")
299+
if not columns:
300+
raise ValueError(f"The 'columns' parameter must be a non-empty string. {type(columns)}")
301+
elif not isinstance(columns, list) or len(columns) != 2:
302+
raise ValueError(f"'columns' parameter must be a list with two column names. {type(columns)}")
303+
304+
score_column = columns[0]
305+
score_column_2 = columns[1]
306+
307+
buy_signal_column = config.get("names")[0]
308+
sell_signal_column = config.get("names")[1]
309+
310+
# Both buy scores are greater than the corresponding thresholds
311+
df[buy_signal_column] = \
312+
(df[score_column] >= parameters.get("buy_signal_threshold")) & \
313+
(df[score_column_2] >= parameters.get("buy_signal_threshold_2"))
314+
315+
# Both sell scores are smaller than the corresponding thresholds
316+
df[sell_signal_column] = \
317+
(df[score_column] <= parameters.get("sell_signal_threshold")) & \
318+
(df[score_column_2] <= parameters.get("sell_signal_threshold_2"))
319+
320+
return df, [buy_signal_column, sell_signal_column]
321+
322+
323+
# TODO: DEPRECATED TO BE REMOVED
181324
def apply_rule_with_score_thresholds_2(df, score_column_names, model):
182325
"""
183326
Assume using difference combination with negative sell scores

configs/config-sample-v0.7.dev.jsonc

Lines changed: 46 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -56,9 +56,33 @@
5656
}
5757
],
5858

59-
"features_horizon": 100, // Minimum data length for computing features (in online mode). Take it from feature generator parameters
59+
"features_horizon": 100, // Online/stream: Minimum data length for computing features. Take it from feature generator parameters
60+
"features_last_rows": 5, // Online/stream: Last values which are really needed and have to be computed. All older values are not needed
6061

61-
// === AGGREGATION AND POST-PROCESSING ===
62+
// === SIGNALS ===
63+
"signal_sets": [
64+
{
65+
// Combine two unsigned scores into one signed score
66+
"generator": "combine", "config": {
67+
"columns": ["high_20_lc", "low_20_lc"], // 2 columns: with grow score and fall score
68+
"names": "trade_score", // Output column name: positive values - buy, negative values - sell
69+
"parameters": {
70+
"point_threshold": null, // Produce boolean column (optional)
71+
"window": 3, // Aggregate in time
72+
"combine": "difference", // "no_combine" (or empty), "relative", "difference"
73+
"coefficient": 1.0, "constant": 0.0 // Normalize
74+
}
75+
}},
76+
{
77+
// Decide whether to buy or sell depending on the score value
78+
"generator": "threshold_rule", "config": {
79+
"columns": "trade_score",
80+
"names": ["buy_signal_column", "sell_signal_column"], // Output boolean columns
81+
"parameters": {"buy_signal_threshold": 0.05, "sell_signal_threshold": -0.05}
82+
}}
83+
],
84+
85+
// === (DEPRECATED) AGGREGATION AND POST-PROCESSING ===
6286

6387
"score_aggregation_sets": [
6488
{
@@ -77,26 +101,13 @@
77101
}
78102
],
79103

80-
// === TRADE SIGNAL MODEL ===
81-
82-
"trade_model": {
83-
"rule_name": "",
84-
85-
"signal_columns": ["buy_signal_column", "sell_signal_column"],
86-
87-
"parameters": {
88-
"buy_signal_threshold": 0.05,
89-
"sell_signal_threshold": -0.05
90-
},
91-
92-
"simulate_trade": true // Trade simulation and notifications
93-
},
94-
95104
// === NOTIFICATIONS ===
96105

97106
"score_notification_model": { // When and what score notifications to send
98107
"score_notification": true,
99108

109+
"score_column_names": ["trade_score"],
110+
100111
"notify_band_up": true,
101112
"notify_band_dn": true,
102113
"positive_bands": [
@@ -110,9 +121,25 @@
110121
{"edge": -0.04, "frequency": 3, "sign": "", "text": "weak"},
111122
{"edge": -0.05, "frequency": 2, "sign": "〈〈", "bold": false, "text": "strong"},
112123
{"edge": -1.0, "frequency": 1, "sign": "〈〈〈📉", "bold": true, "text": "SELL ZONE"}
113-
],
124+
]
125+
},
126+
127+
"diagram_notification_model": {
128+
// Regularly sending historic data with prices, scores and buy-sell trade decisions
129+
"diagram_notification": true,
130+
131+
// # 1 previous week with hourly aggregation
132+
"freq": "H",
133+
"nrows": 168
134+
},
135+
136+
// === TRADE MODEL ===
137+
138+
"trade_model": {
139+
"simulate_trade": true, // Trade simulation with notifications
114140

115-
"notify_diagram": false
141+
"buy_signal_column":"buy_signal_column",
142+
"sell_signal_column": "sell_signal_column"
116143
},
117144

118145
// === FINDING BEST TRADE PARAMETERS ===

scripts/features.py

Lines changed: 17 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -7,10 +7,12 @@
77

88
from service.App import *
99
from common.feature_generation import *
10-
from common.label_generation_highlow import generate_labels_highlow
11-
from common.label_generation_highlow import generate_labels_highlow2
12-
from common.label_generation_topbot import generate_labels_topbot
13-
from common.label_generation_topbot import generate_labels_topbot2
10+
from common.label_generation_highlow import generate_labels_highlow, generate_labels_highlow2
11+
from common.label_generation_topbot import generate_labels_topbot, generate_labels_topbot2
12+
from common.signal_generation import (
13+
generate_smoothen_scores, generate_combine_scores,
14+
generate_threshold_rule, generate_threshold_rule2
15+
)
1416

1517
#
1618
# Parameters
@@ -157,6 +159,17 @@ def generate_feature_set(df: pd.DataFrame, fs: dict, last_rows: int) -> Tuple[pd
157159
f_df, features = generate_labels_topbot(f_df, column_name, top_level_fracs, bot_level_fracs)
158160
elif generator == "topbot2":
159161
f_df, features = generate_labels_topbot2(f_df, gen_config)
162+
163+
# Signals
164+
elif generator == "smoothen":
165+
f_df, features = generate_smoothen_scores(f_df, gen_config)
166+
elif generator == "combine":
167+
f_df, features = generate_combine_scores(f_df, gen_config)
168+
elif generator == "threshold_rule":
169+
f_df, features = generate_threshold_rule(f_df, gen_config)
170+
elif generator == "threshold_rule2":
171+
f_df, features = generate_threshold_rule2(f_df, gen_config)
172+
160173
else:
161174
print(f"Unknown feature generator {generator}")
162175
return

service/App.py

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ class App:
3939
trade_state_status = 0 # Something wrong with our trading logic (wrong use, inconsistent state etc. what we cannot recover)
4040

4141
signal = None # Latest signal "BUY", "SELL"
42-
feature_df = None # Data from the latest analysis
42+
df = None # Data from the latest analysis
4343

4444
# Trade status
4545
transaction = None
@@ -140,10 +140,16 @@ class App:
140140
# Minimum history length required to compute derived features
141141
"features_horizon": 10,
142142

143-
# =======================================
144-
# === AGGREGATION AND POST-PROCESSING ===
143+
# ===============
144+
# === SIGNALS ===
145145

146-
"score_aggregation_sets": [],
146+
"signal_sets": [],
147+
148+
# =====================
149+
# === NOTIFICATIONS ===
150+
151+
"score_notification_model": {},
152+
"diagram_notification_model": {},
147153

148154
# ================================
149155
# === SIGNAL RULES FOR TRADING ===

0 commit comments

Comments
 (0)