Add docstrings for forecast_ar.py, task_fit_ar_model.py and task_forecast_ar.py.

Lenr4 · Lenr4 · commit 8cbc4b4bcfd7 · 2025-02-25T16:15:42.000+01:00
diff --git a/src/lennart_epp/analysis/forecast_ar.py b/src/lennart_epp/analysis/forecast_ar.py
@@ -4,6 +4,20 @@
 def forecast_ar_multi_step(
     df: pd.DataFrame, integrated_coefficients: pd.DataFrame, forecast_steps: int
 ) -> pd.Series:
+    """Generate multi-step forecasts using an AR model.
+
+    This function uses the integrated AR model coefficients to iteratively
+    forecast future values.
+
+    Args:
+        df (pd.DataFrame): The DataFrame containing the time series data.
+        integrated_coefficients (pd.DataFrame): DataFrame of integrated AR coefficients.
+        forecast_steps (int): Number of future steps to forecast.
+
+    Returns:
+        pd.Series: A time series of forecasted values indexed dates.
+
+    """
     coeffs = integrated_coefficients["coefficient"].to_numpy()
     lag_order = len(coeffs) - 1
 
diff --git a/src/lennart_epp/analysis/task_fit_ar_model.py b/src/lennart_epp/analysis/task_fit_ar_model.py
@@ -14,12 +14,29 @@ def task_evaluate_ar_models(
     max_p=12,
     criterion="aic",
 ):
+    """Evaluate multiple AR models and store the results.
+
+    This function loads cleaned stock price data, fits AR models up to the specified
+    maximum lag order, and evaluates them using the given selection criterion.
+
+    Args:
+        script (Path): Path to the script that evaluates AR models.
+        data (Path): Path to the cleaned Apple stock data.
+        produces (tuple[Path, Path]): Paths to the output files:
+            - Pickle file containing evaluation results.
+            - LaTeX file with the top AR models.
+        max_p (int, optional): Maximum order of the AR model to evaluate.
+        criterion (str, optional): Model selection criterion ("aic" or "bic").
+
+    Returns:
+        None: Saves results to specified output files and asserts their existence.
+    """
     df = pd.read_pickle(data)
     evaluation_results = evaluate_ar_models(df, max_p=max_p, criterion=criterion)
 
     produces[0].parent.mkdir(parents=True, exist_ok=True)
     pd.to_pickle(evaluation_results, produces[0])
-    assert produces[0].exists(), f"❌ Failed to produce {produces[0]}"
+    assert produces[0].exists(), f" Failed to produce {produces[0]}"
 
     top_models_df = pd.DataFrame(evaluation_results.get("top_models", []))
 
@@ -53,4 +70,4 @@ def task_evaluate_ar_models(
             f.write("\\end{tabular}\n")
             f.write("\\end{table}\n")
 
-        assert produces[1].exists(), f"❌ Failed to produce {produces[1]}"
+        assert produces[1].exists(), f" Failed to produce {produces[1]}"
diff --git a/src/lennart_epp/analysis/task_forecast_ar.py b/src/lennart_epp/analysis/task_forecast_ar.py
@@ -5,18 +5,33 @@
 from lennart_epp.analysis.forecast_ar import forecast_ar_multi_step
 from lennart_epp.config import BLD
 
-# Konstanten für Fehlermeldungen
-missing_close_price_msg = "Spalte 'close_price' fehlt!"
-too_few_train_msg = "Zu wenige Trainingsdaten."
-type_forecast_msg = "Forecast muss 'pd.Series' sein."
-multi_forecast_msg = "Forecast enthält mehrdimensionale Werte."
+missing_close_price_msg = "Column 'close_price' missing!"
+too_few_train_msg = "Not enough training data."
+type_forecast_msg = "Forecast has to be 'pd.Series'."
+multi_forecast_msg = "Forecast contains multi-dimensional values."
 
 
 def task_forecast_ar(
     data=BLD / "data" / "cleaned_apple_data.pkl",
-    produces=BLD / "forecasts" / "apple_2023_forecast.pkl",
-    lags=30,
+    produces=BLD / "forecasts" / "multistep_forecast.pkl",
+    lags=50,
 ):
+    """Generate multi-step forecasts using an AR model.
+
+    Args:
+        data (Path): Path to the cleaned Apple stock data.
+        produces (Path): Path to store the multi-step forecast as a pickle file.
+        lags (int, optional): Number of lags to use for the AR model.
+
+    Raises:
+        KeyError: If the required "close_price" column is missing.
+        ValueError: If there are insufficient training data.
+        TypeError: If the forecast is not a Pandas Series.
+        ValueError: If the forecast contains multi-dimensional values.
+
+    Returns:
+        None: Saves the forecast to specified output files.
+    """
     df = pd.read_pickle(data)
     if "close_price" not in df.columns:
         raise KeyError(missing_close_price_msg)
@@ -26,18 +41,15 @@ def task_forecast_ar(
     if len(train_data) < lags:
         raise ValueError(too_few_train_msg)
 
-    # AR-Modell fitten, um die integrierten Koeffizienten zu erhalten
     ar_result = fit_ar_model(train_data.to_frame(), column="close_price", p=lags)
     integrated_coefficients = ar_result["integrated_coefficients"]
 
-    # Forecast mit den integrierten Koeffizienten berechnen
     forecast = forecast_ar_multi_step(
         df,
         integrated_coefficients=integrated_coefficients,
         forecast_steps=lags,
     )
 
-    # Validierung before dem Speichern
     if not isinstance(forecast, pd.Series):
         raise TypeError(type_forecast_msg)
     if forecast.apply(lambda x: isinstance(x, list | np.ndarray)).any():