66
77def _check_stationarity (
88 df : pd .DataFrame , column : str , significance : float = 0.05
9- ) -> tuple [bool , float ]:
9+ ) -> tuple [bool , float , float ]:
10+ """Perform the Augmented Dickey-Fuller (ADF) test to check stationarity.
11+
12+ Args:
13+ df (pd.DataFrame): The DataFrame containing the time series data.
14+ column (str): The name of the column to test for stationarity.
15+ significance (float, optional): The significance level for the test.
16+
17+ Returns:
18+ tuple:
19+ - bool: True if the series is stationary
20+ - float: The p-value from the ADF test.
21+ - float: The ADF test statistic.
22+ """
1023 adf_test = ADF (df [column ].dropna ())
1124 p_value = adf_test .pvalue
25+ test_statistic_adf = adf_test .stat
1226
13- return p_value < significance , p_value
27+ return p_value < significance , p_value , test_statistic_adf
1428
1529
1630def _difference_series (df : pd .DataFrame , column : str ) -> pd .DataFrame :
17- df [f"diff_{ column } " ] = df [column ].diff ().dropna ()
18- return df
31+ """Apply first differencing to a column in a DataFrame.
32+
33+ Args:
34+ df (pd.DataFrame): The dataframe containing the time series.
35+ column (str): The column to be differenced.
36+
37+ Returns:
38+ pd.DataFrame: A new DataFrame with the differenced column.
39+ """
40+ df_copy = df .copy ()
41+ df_copy [f"diff_{ column } " ] = df_copy [column ].diff ().dropna ()
42+ return df_copy [[f"diff_{ column } " ]]
1943
2044
2145def _create_lagged_features (df : pd .DataFrame , column : str , p : int ) -> pd .DataFrame :
46+ """Generate lagged features for an autoregressive model.
47+
48+ Args:
49+ df (pd.DataFrame): The input DataFrame containing the time series data.
50+ column (str): The column for which lagged features should be created.
51+ p (int): The number of lagged periods to generate.
52+
53+ Returns:
54+ pd.DataFrame: A DataFrame with the original column and its lagged features.
55+ """
2256 for lag in range (1 , p + 1 ):
2357 df [f"{ column } _lag{ lag } " ] = df [column ].shift (lag )
2458
@@ -28,6 +62,16 @@ def _create_lagged_features(df: pd.DataFrame, column: str, p: int) -> pd.DataFra
2862
2963
3064def _ar_model (df : pd .DataFrame , column : str , p : int ) -> np .ndarray :
65+ """Estimate autoregressive (AR) model parameters using the least squares method.
66+
67+ Args:
68+ df (pd.DataFrame): The DataFrame with time series data and lagged features.
69+ column (str): The target column for the autoregressive model.
70+ p (int): The order (number of lags) of the AR model.
71+
72+ Returns:
73+ np.ndarray: An array of estimated coefficients, including the intercept term.
74+ """
3175 x = df [[f"{ column } _lag{ i } " for i in range (1 , p + 1 )]].to_numpy ()
3276 y = df [column ].to_numpy ()
3377
@@ -41,23 +85,28 @@ def _ar_model(df: pd.DataFrame, column: str, p: int) -> np.ndarray:
4185def _integrate_ar_coefficients (
4286 diff_coefficients : np .ndarray , * , differenced : bool
4387) -> pd .DataFrame :
88+ """Convert differenced AR model coefficients to integrated form.
89+
90+ Args:
91+ diff_coefficients (np.ndarray): The coefficients from the differenced AR model.
92+ differenced (bool): Whether the model was fitted on differenced data.
93+
94+ Returns:
95+ pd.DataFrame: A DataFrame with integrated coefficients and corresponding lags.
96+ """
4497 if not differenced :
4598 integrated_coeff = diff_coefficients
4699 else :
47- integrated_coeff = np .zeros (len (diff_coefficients ) + 1 ) # Platz für AR(p+1)
48- integrated_coeff [0 ] = diff_coefficients [0 ] # Intercept bleibt gleich
100+ integrated_coeff = np .zeros (len (diff_coefficients ) + 1 )
101+ integrated_coeff [0 ] = diff_coefficients [0 ]
49102
50- # Erster AR-Koeffizient (vom differenzierten Model)
51103 integrated_coeff [1 ] = 1 + diff_coefficients [1 ]
52104
53- # Nachfolgende AR-Koeffizienten
54105 for i in range (2 , len (diff_coefficients )):
55106 integrated_coeff [i ] = diff_coefficients [i - 1 ] - diff_coefficients [i ]
56107
57- # Zusätzliches Lag (durch Integration)
58108 integrated_coeff [- 1 ] = - diff_coefficients [- 1 ]
59109
60- # DataFrame für besseren Überblick
61110 integrated_coeff_df = pd .DataFrame (
62111 {
63112 "coefficient" : integrated_coeff ,
@@ -71,9 +120,24 @@ def _integrate_ar_coefficients(
71120 return integrated_coeff_df
72121
73122
74- def fit_ar_model (df : pd .DataFrame , column : str = "close_price" , p : int = 3 ) -> dict :
75- """Fitte ein AR(p)-Modell und speichere differenzierte & originale Koeffizienten."""
76- is_stationary , p_value = _check_stationarity (df , column )
123+ def fit_ar_model (df : pd .DataFrame , column : str = "close_price" , p : int = 1 ) -> dict :
124+ """Fit an autoregressive (AR) model of order p.
125+
126+ Args:
127+ df (pd.DataFrame): The DataFrame containing the time series data.
128+ column (str, optional): The target column to model. Defaults to "close_price".
129+ p (int, optional): The order of the AR model. Defaults to 1.
130+
131+ Returns:
132+ dict: A dictionary containing:
133+ - "coefficients" (np.ndarray): Estimated coefficients of the AR(p) model.
134+ - "integrated_coefficients" (pd.DataFrame): Integrated coefficients.
135+ - "lag_order" (int): The order of the AR model (p).
136+ - "p_value" (float): The p-value from the stationarity test.
137+ - "differenced" (bool): Whether the series was differenced before fitting.
138+
139+ """
140+ is_stationary , p_value , test_statistic_adf = _check_stationarity (df , column )
77141 differenced = False
78142
79143 if not is_stationary :
0 commit comments