Skip to content

Commit a6697fe

Browse files
committed
Update memory.py and task_memory.py to compute ACF and Hurst exponent for differenced close price, and update environment file for dependencies.
1 parent 87f8340 commit a6697fe

File tree

3 files changed

+168
-1
lines changed

3 files changed

+168
-1
lines changed

environment.yml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ dependencies:
2525
- pandas >=2.2
2626
- plotly >=5.2.0,<6
2727
- yfinance # added yfinance
28+
- scikit-learn >=1.0
2829

2930
# R template project dependencies
3031
- pytask-r >=0.4.1
@@ -35,4 +36,4 @@ dependencies:
3536
- r-forcats
3637

3738
# Install project
38-
- pip: [-e ., pdbp, kaleido, arch >=5.1]
39+
- pip: [-e ., pdbp, kaleido, arch >=5.1, nolds]

src/lennart_epp/analysis/memory.py

Lines changed: 117 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,7 @@
1+
from pathlib import Path
2+
3+
import nolds
4+
import numpy as np
15
import pandas as pd
26

37
from lennart_epp.analysis.fit_ar_model import _check_stationarity, _difference_series
@@ -76,3 +80,116 @@ def write_stationarity_results(results: dict, file_path):
7680

7781
with file_path.open("w", encoding="utf-8") as f:
7882
f.write(latex_content.strip())
83+
84+
85+
def _compute_mean(series: np.ndarray) -> float:
86+
"""Compute the mean of a given time series.
87+
88+
Args:
89+
series (np.ndarray): The time series data as a NumPy array.
90+
91+
Returns:
92+
float: The mean of the time series.
93+
"""
94+
return np.mean(series)
95+
96+
97+
def _compute_variance(series: np.ndarray, mean_series: float) -> float:
98+
"""Compute the variance of a time series.
99+
100+
Args:
101+
series (np.ndarray): The time series data as a NumPy array.
102+
mean_series (float): The precomputed mean of the series.
103+
104+
Returns:
105+
float: The variance of the time series.
106+
"""
107+
return np.sum((series - mean_series) ** 2)
108+
109+
110+
def _compute_autocovariance(series: np.ndarray, mean_series: float, lag: int) -> float:
111+
"""Compute the autocovariance for a given lag in a time series.
112+
113+
Args:
114+
series (np.ndarray): The time series data as a NumPy array.
115+
mean_series (float): The precomputed mean of the series.
116+
lag (int): The lag at which to compute the autocovariance.
117+
118+
Returns:
119+
float: The autocovariance value for the specified lag.
120+
"""
121+
n = len(series)
122+
return np.sum((series[lag:] - mean_series) * (series[: n - lag] - mean_series))
123+
124+
125+
def compute_acf(
126+
df: pd.DataFrame, column: str = "close_price", lags: int = 1000
127+
) -> dict:
128+
"""Compute the ACF manually for the first-differenced time series.
129+
130+
Args:
131+
df (pd.DataFrame): The dataframe containing the time series.
132+
column (str, optional): Column to analyze. Defaults to "close_price".
133+
lags (int, optional): Number of lags for ACF.
134+
135+
Returns:
136+
dict: A dictionary containing ACF values and corresponding lags.
137+
"""
138+
df_diff = _difference_series(df, column)
139+
series = df_diff[f"diff_{column}"].dropna().to_numpy()
140+
141+
lags = min(len(series) - 1, lags)
142+
mean_series = _compute_mean(series)
143+
variance = _compute_variance(series, mean_series)
144+
145+
acf_values = []
146+
for lag in range(lags + 1):
147+
autocovariance = _compute_autocovariance(series, mean_series, lag)
148+
acf_values.append(autocovariance / variance)
149+
150+
return {"acf": np.array(acf_values), "lags": np.arange(len(acf_values))}
151+
152+
153+
def compute_hurst_exponent(df: pd.DataFrame, column: str = "close_price") -> dict:
154+
"""Compute the Hurst exponent to assess long-memory effects.
155+
156+
Args:
157+
df (pd.DataFrame): The dataframe containing the time series.
158+
column (str, optional): Column to analyze. Defaults to "close_price".
159+
160+
Returns:
161+
dict: A dictionary containing the computed Hurst exponent.
162+
"""
163+
series = df[column].dropna().to_numpy()
164+
hurst_value = nolds.hurst_rs(series)
165+
166+
return {"Hurst Exponent": hurst_value}
167+
168+
169+
def write_hurst_result_to_tex(results: dict, file_path: Path):
170+
"""Write the computed Hurst exponent results to a LaTeX file.
171+
172+
Args:
173+
results (dict): Dictionary containing the Hurst exponent.
174+
file_path (Path): Path where the LaTeX file will be saved.
175+
"""
176+
file_path.parent.mkdir(parents=True, exist_ok=True)
177+
178+
hurst_value = results["Hurst Exponent"]
179+
180+
latex_content = f"""
181+
\\begin{{table}}[H]
182+
\\centering
183+
\\caption{{Hurst Exponent Analysis}}
184+
\\label{{tab:hurst_exponent}}
185+
\\begin{{tabular}}{{l c}}
186+
\\toprule
187+
\\textbf{{Metric}} & \\textbf{{Value}} \\\\
188+
\\midrule
189+
Hurst Exponent & {hurst_value:.4f} \\\\
190+
\\bottomrule
191+
\\end{{tabular}}
192+
\\end{{table}}
193+
"""
194+
195+
file_path.write_text(latex_content.strip(), encoding="utf-8")

src/lennart_epp/analysis/task_memory.py

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,9 @@
22

33
from lennart_epp.analysis.memory import (
44
check_stat_diff_close,
5+
compute_acf,
6+
compute_hurst_exponent,
7+
write_hurst_result_to_tex,
58
write_stationarity_results,
69
)
710
from lennart_epp.config import BLD
@@ -27,3 +30,49 @@ def task_check_stat_diff_close(
2730
write_stationarity_results(results, produces)
2831

2932
assert produces.exists(), f" Failed to produce {produces}"
33+
34+
35+
def task_compute_acf(
36+
data=BLD / "data" / "cleaned_apple_data.pkl",
37+
produces=BLD / "memory" / "acf.pkl",
38+
):
39+
"""Task to compute and save ACF values manually for the differenced close price.
40+
41+
Args:
42+
data (Path): Path to the cleaned Apple stock data (Pickle file).
43+
produces (Path): Path to output pickle file where the ACF results are stored.
44+
45+
Returns:
46+
None: Saves ACF values to a .pkl file.
47+
"""
48+
df = pd.read_pickle(data)
49+
50+
results = compute_acf(df, column="close_price")
51+
52+
produces.parent.mkdir(parents=True, exist_ok=True)
53+
pd.to_pickle(results, produces)
54+
55+
assert produces.exists(), f"Failed to produce {produces}"
56+
57+
58+
def task_hurst_exponent(
59+
data=BLD / "data" / "cleaned_apple_data.pkl",
60+
produces=BLD / "memory" / "hurst_exponent.tex",
61+
):
62+
"""Task to compute the Hurst exponent and store results as LaTeX file.
63+
64+
Args:
65+
data (Path): Path to the cleaned Apple stock data.
66+
produces (Path): Path to the output LaTeX file.
67+
68+
Returns:
69+
None: Saves results as a LaTeX file.
70+
"""
71+
df = pd.read_pickle(data)
72+
df["diff_close_price"] = df["close_price"].diff().dropna()
73+
74+
results = compute_hurst_exponent(df, column="diff_close_price")
75+
76+
write_hurst_result_to_tex(results, produces)
77+
78+
assert produces.exists(), f"Failed to produce {produces}"

0 commit comments

Comments
 (0)