Add test file for memory.py and add remaining docstrings in task_plot_ar.py.

Lenr4 · Lenr4 · commit f672bec77c65 · 2025-02-26T12:35:50.000+01:00
diff --git a/pytest.ini b/pytest.ini
@@ -0,0 +1,5 @@
+[pytest]
+filterwarnings =
+    ignore::DeprecationWarning
+    ignore:pkg_resources is deprecated:DeprecationWarning
+    ignore::DeprecationWarning:nolds.datasets
diff --git a/src/lennart_epp/analysis/task_forecast_ar.py b/src/lennart_epp/analysis/task_forecast_ar.py
@@ -57,6 +57,3 @@ def task_forecast_ar(
 
     produces.parent.mkdir(parents=True, exist_ok=True)
     pd.to_pickle(forecast, produces)
-
-    html_output = produces.with_suffix(".html")
-    forecast.to_frame(name="Forecasted Close Price").to_html(html_output, index=True)
diff --git a/src/lennart_epp/final/task_plot_ar.py b/src/lennart_epp/final/task_plot_ar.py
@@ -4,15 +4,28 @@
 from lennart_epp.final.plot_ar import plot_top_ar_models
 from lennart_epp.final.plot_forecast_ar import plot_forecast_ar
 
-no_top_models_msg = "Keine Top-Modelle in der Auswertung gefunden."
-file_creation_msg = "Datei konnte nicht erstellt werden."
+no_top_models_msg = "Top-models not found."
+file_creation_msg = "File could not be created."
 
 
 def task_plot_top_ar_models(
     script=SRC / "final" / "plot_ar.py",
     evaluation_data=BLD / "models" / "ar_model_evaluation.pkl",
     produces=BLD / "plots" / "top_ar_models_plot.html",
 ):
+    """Generate and save a plot of the top-performing AR models.
+
+    Args:
+        script (Path): Path to the script responsible for plotting.
+        evaluation_data (Path): Path to the AR model evaluation data.
+        produces (Path): Path where the output plot (HTML) will be stored.
+
+    Returns:
+        None: Ensures the top AR models are visualized and stored.
+
+    Raises:
+        ValueError: If no top models are available in the evaluation results.
+    """
     evaluation = pd.read_pickle(evaluation_data)
 
     top_models = pd.DataFrame(evaluation.get("top_models", []))
@@ -24,7 +37,7 @@ def task_plot_top_ar_models(
         top_models=top_models,
         df=pd.read_pickle(BLD / "data" / "cleaned_apple_data.pkl"),
         plot_path=str(produces),
-        export_as_pdf=True,  # Änderung: jetzt wird auch ein PDF erzeugt
+        export_as_pdf=True,
     )
 
     produces.parent.mkdir(parents=True, exist_ok=True)
@@ -33,9 +46,22 @@ def task_plot_top_ar_models(
 
 def task_plot_forecast_ar(
     data=BLD / "data" / "cleaned_apple_data.pkl",
-    forecast=BLD / "forecasts" / "apple_2023_forecast.pkl",
-    produces=BLD / "plots" / "apple_forecast_2023.html",
+    forecast=BLD / "forecasts" / "multistep_forecast.pkl",
+    produces=BLD / "plots" / "multistep_forecast.html",
 ):
+    """Generate and save a visualization of the AR model multi-step forecast.
+
+    Args:
+        data (Path): Path to the cleaned stock price data.
+        forecast (Path): Path to the multi-step forecast data.
+        produces (Path): Path to store the generated forecast plot.
+
+    Returns:
+        None: Ensures the forecast visualization is generated and saved.
+
+    Raises:
+        AssertionError: If the output plot file is not created successfully.
+    """
     plot_forecast_ar(
         data_path=data,
         forecast_path=forecast,
diff --git a/tests/analysis/test_memory.py b/tests/analysis/test_memory.py
@@ -0,0 +1,122 @@
+import numpy as np
+import pandas as pd
+import pytest
+
+from lennart_epp.analysis.memory import (
+    _compute_autocovariance,
+    _compute_mean,
+    _compute_variance,
+    check_stat_diff_close,
+    compute_acf,
+    compute_hurst_exponent,
+)
+
+
+@pytest.fixture
+def test_series():
+    """Generate a random time series for testing."""
+    rng = np.random.default_rng(2837)
+    return rng.standard_normal(100)
+
+
+@pytest.fixture
+def test_df(test_series):
+    """Generate a DataFrame containing the test series."""
+    return pd.DataFrame({"close_price": test_series})
+
+
+@pytest.fixture
+def precomputed_values(test_series):
+    """Precompute mean and variance tests."""
+    mean_series = np.mean(test_series)
+    variance_series = np.sum((test_series - mean_series) ** 2)
+    return mean_series, variance_series
+
+
+def test_compute_mean(test_series):
+    """Test whether _compute_mean returns the correct mean."""
+    assert _compute_mean(test_series) == pytest.approx(np.mean(test_series), rel=1e-6)
+
+
+def test_compute_variance(test_series, precomputed_values):
+    """Test whether _compute_variance returns the correct variance."""
+    mean_series, expected_variance = precomputed_values
+    assert _compute_variance(test_series, mean_series) == pytest.approx(
+        expected_variance, rel=1e-6
+    )
+
+
+def test_compute_autocovariance_lag_1(test_series, precomputed_values):
+    """Test whether _compute_autocovariance returns the correct value for lag=1."""
+    mean_series, _ = precomputed_values
+    lag = 1
+    expected_autocov = np.sum(
+        (test_series[lag:] - mean_series)
+        * (test_series[: len(test_series) - lag] - mean_series)
+    )
+    assert _compute_autocovariance(test_series, mean_series, lag) == pytest.approx(
+        expected_autocov, rel=1e-6
+    )
+
+
+def test_compute_autocovariance_lag_5(test_series, precomputed_values):
+    """Test whether _compute_autocovariance returns the correct value for lag=10."""
+    mean_series, _ = precomputed_values
+    lag = 10
+    expected_autocov = np.sum(
+        (test_series[lag:] - mean_series)
+        * (test_series[: len(test_series) - lag] - mean_series)
+    )
+    assert _compute_autocovariance(test_series, mean_series, lag) == pytest.approx(
+        expected_autocov, rel=1e-6
+    )
+
+
+def test_compute_acf_output_structure(test_df):
+    """Test whether compute_acf returns a dictionary with 'acf' and 'lags' keys."""
+    result = compute_acf(test_df, column="close_price", lags=10)
+    assert all(key in result for key in ("acf", "lags"))
+
+
+expected_length = 11
+
+
+def test_compute_acf_length(test_df):
+    """Test whether compute_acf returns arrays of correct length."""
+    result = compute_acf(test_df, column="close_price", lags=10)
+    assert len(result["acf"]) == expected_length
+    assert len(result["lags"]) == expected_length
+
+
+def test_compute_hurst_exponent_output(test_df):
+    """Test if compute_hurst_exponent returns dictionary with 'Hurst Exponent' key."""
+    result = compute_hurst_exponent(test_df, column="close_price")
+    assert isinstance(result, dict)
+    assert "Hurst Exponent" in result
+
+
+def test_compute_hurst_exponent_range(test_df):
+    """Test whether the computed Hurst Exponent is within the expected range [0,1]."""
+    hurst_value = compute_hurst_exponent(test_df, column="close_price")[
+        "Hurst Exponent"
+    ]
+    assert 0 <= hurst_value <= 1
+
+
+def test_check_stat_diff_close_returns_dict(test_df):
+    """Ensure check_stat_diff_close returns a dictionary."""
+    result = check_stat_diff_close(test_df, column="close_price")
+    assert isinstance(result, dict)
+
+
+def test_check_stat_diff_close_has_expected_keys(test_df):
+    """Ensure the returned dictionary has the correct keys."""
+    result = check_stat_diff_close(test_df, column="close_price")
+    expected_keys = {"ADF Test Statistic", "P-Value", "Is Stationary"}
+    assert set(result.keys()) == expected_keys
+
+
+def test_check_stat_diff_close_raises_error_for_missing_column(test_df):
+    """Ensure a ValueError is raised if the column does not exist."""
+    with pytest.raises(ValueError, match="Column .* not found in dataframe."):
+        check_stat_diff_close(test_df, column="non_existent_column")