Skip to content

Commit 8cbc4b4

Browse files
committed
Add docstrings for forecast_ar.py, task_fit_ar_model.py and task_forecast_ar.py.
1 parent f565636 commit 8cbc4b4

File tree

3 files changed

+55
-12
lines changed

3 files changed

+55
-12
lines changed

src/lennart_epp/analysis/forecast_ar.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,20 @@
44
def forecast_ar_multi_step(
55
df: pd.DataFrame, integrated_coefficients: pd.DataFrame, forecast_steps: int
66
) -> pd.Series:
7+
"""Generate multi-step forecasts using an AR model.
8+
9+
This function uses the integrated AR model coefficients to iteratively
10+
forecast future values.
11+
12+
Args:
13+
df (pd.DataFrame): The DataFrame containing the time series data.
14+
integrated_coefficients (pd.DataFrame): DataFrame of integrated AR coefficients.
15+
forecast_steps (int): Number of future steps to forecast.
16+
17+
Returns:
18+
pd.Series: A time series of forecasted values indexed dates.
19+
20+
"""
721
coeffs = integrated_coefficients["coefficient"].to_numpy()
822
lag_order = len(coeffs) - 1
923

src/lennart_epp/analysis/task_fit_ar_model.py

Lines changed: 19 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,12 +14,29 @@ def task_evaluate_ar_models(
1414
max_p=12,
1515
criterion="aic",
1616
):
17+
"""Evaluate multiple AR models and store the results.
18+
19+
This function loads cleaned stock price data, fits AR models up to the specified
20+
maximum lag order, and evaluates them using the given selection criterion.
21+
22+
Args:
23+
script (Path): Path to the script that evaluates AR models.
24+
data (Path): Path to the cleaned Apple stock data.
25+
produces (tuple[Path, Path]): Paths to the output files:
26+
- Pickle file containing evaluation results.
27+
- LaTeX file with the top AR models.
28+
max_p (int, optional): Maximum order of the AR model to evaluate.
29+
criterion (str, optional): Model selection criterion ("aic" or "bic").
30+
31+
Returns:
32+
None: Saves results to specified output files and asserts their existence.
33+
"""
1734
df = pd.read_pickle(data)
1835
evaluation_results = evaluate_ar_models(df, max_p=max_p, criterion=criterion)
1936

2037
produces[0].parent.mkdir(parents=True, exist_ok=True)
2138
pd.to_pickle(evaluation_results, produces[0])
22-
assert produces[0].exists(), f" Failed to produce {produces[0]}"
39+
assert produces[0].exists(), f" Failed to produce {produces[0]}"
2340

2441
top_models_df = pd.DataFrame(evaluation_results.get("top_models", []))
2542

@@ -53,4 +70,4 @@ def task_evaluate_ar_models(
5370
f.write("\\end{tabular}\n")
5471
f.write("\\end{table}\n")
5572

56-
assert produces[1].exists(), f" Failed to produce {produces[1]}"
73+
assert produces[1].exists(), f" Failed to produce {produces[1]}"

src/lennart_epp/analysis/task_forecast_ar.py

Lines changed: 22 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -5,18 +5,33 @@
55
from lennart_epp.analysis.forecast_ar import forecast_ar_multi_step
66
from lennart_epp.config import BLD
77

8-
# Konstanten für Fehlermeldungen
9-
missing_close_price_msg = "Spalte 'close_price' fehlt!"
10-
too_few_train_msg = "Zu wenige Trainingsdaten."
11-
type_forecast_msg = "Forecast muss 'pd.Series' sein."
12-
multi_forecast_msg = "Forecast enthält mehrdimensionale Werte."
8+
missing_close_price_msg = "Column 'close_price' missing!"
9+
too_few_train_msg = "Not enough training data."
10+
type_forecast_msg = "Forecast has to be 'pd.Series'."
11+
multi_forecast_msg = "Forecast contains multi-dimensional values."
1312

1413

1514
def task_forecast_ar(
1615
data=BLD / "data" / "cleaned_apple_data.pkl",
17-
produces=BLD / "forecasts" / "apple_2023_forecast.pkl",
18-
lags=30,
16+
produces=BLD / "forecasts" / "multistep_forecast.pkl",
17+
lags=50,
1918
):
19+
"""Generate multi-step forecasts using an AR model.
20+
21+
Args:
22+
data (Path): Path to the cleaned Apple stock data.
23+
produces (Path): Path to store the multi-step forecast as a pickle file.
24+
lags (int, optional): Number of lags to use for the AR model.
25+
26+
Raises:
27+
KeyError: If the required "close_price" column is missing.
28+
ValueError: If there are insufficient training data.
29+
TypeError: If the forecast is not a Pandas Series.
30+
ValueError: If the forecast contains multi-dimensional values.
31+
32+
Returns:
33+
None: Saves the forecast to specified output files.
34+
"""
2035
df = pd.read_pickle(data)
2136
if "close_price" not in df.columns:
2237
raise KeyError(missing_close_price_msg)
@@ -26,18 +41,15 @@ def task_forecast_ar(
2641
if len(train_data) < lags:
2742
raise ValueError(too_few_train_msg)
2843

29-
# AR-Modell fitten, um die integrierten Koeffizienten zu erhalten
3044
ar_result = fit_ar_model(train_data.to_frame(), column="close_price", p=lags)
3145
integrated_coefficients = ar_result["integrated_coefficients"]
3246

33-
# Forecast mit den integrierten Koeffizienten berechnen
3447
forecast = forecast_ar_multi_step(
3548
df,
3649
integrated_coefficients=integrated_coefficients,
3750
forecast_steps=lags,
3851
)
3952

40-
# Validierung before dem Speichern
4153
if not isinstance(forecast, pd.Series):
4254
raise TypeError(type_forecast_msg)
4355
if forecast.apply(lambda x: isinstance(x, list | np.ndarray)).any():

0 commit comments

Comments
 (0)