|
| 1 | +import numpy as np |
| 2 | +import pandas as pd |
| 3 | +import pytest |
| 4 | +from statsmodels.tsa.ar_model import AutoReg |
| 5 | +from statsmodels.tsa.arima_process import ArmaProcess |
| 6 | + |
| 7 | +from lennart_epp.analysis.evaluate_ar_model import ( |
| 8 | + _calculate_aic, |
| 9 | + _calculate_bic, |
| 10 | + _compute_residuals, |
| 11 | + _predict_ar, |
| 12 | +) |
| 13 | + |
| 14 | +Max_Aic_Bic = 1e6 |
| 15 | + |
| 16 | + |
| 17 | +@pytest.fixture |
| 18 | +def test_dataframe(): |
| 19 | + rng = np.random.default_rng(934) |
| 20 | + |
| 21 | + ar_params = [1, -0.75, 0.25] |
| 22 | + ma_params = [1] |
| 23 | + |
| 24 | + ar_process = ArmaProcess(ar_params, ma_params) |
| 25 | + simulated_data = ar_process.generate_sample( |
| 26 | + nsample=200, distrvs=rng.standard_normal |
| 27 | + ) |
| 28 | + |
| 29 | + df = pd.DataFrame({"close_price": simulated_data}) |
| 30 | + return df |
| 31 | + |
| 32 | + |
| 33 | +@pytest.fixture |
| 34 | +def test_model_results(test_dataframe): |
| 35 | + model = AutoReg(test_dataframe["close_price"], lags=2, old_names=False).fit() |
| 36 | + integrated_coefficients = pd.DataFrame( |
| 37 | + {"coefficient": np.insert(model.params.values, 0, model.params.iloc[0])} |
| 38 | + ) |
| 39 | + |
| 40 | + return { |
| 41 | + "p_value": 0.05, |
| 42 | + "differenced": False, |
| 43 | + "coefficients": model.params.to_numpy(), |
| 44 | + "integrated_coefficients": integrated_coefficients, |
| 45 | + } |
| 46 | + |
| 47 | + |
| 48 | +def test_compute_residuals(test_dataframe, test_model_results): |
| 49 | + residuals = _compute_residuals(test_dataframe, test_model_results) |
| 50 | + |
| 51 | + assert len(residuals) == len(test_dataframe) - ( |
| 52 | + len(test_model_results["integrated_coefficients"]) - 1 |
| 53 | + ) |
| 54 | + assert isinstance(residuals, pd.Series) |
| 55 | + assert not np.isnan(residuals).all() |
| 56 | + |
| 57 | + |
| 58 | +def test_calculate_aic_output(test_dataframe, test_model_results): |
| 59 | + residuals = _compute_residuals(test_dataframe, test_model_results) |
| 60 | + aic = _calculate_aic(residuals, p=2) |
| 61 | + |
| 62 | + assert isinstance(aic, float) |
| 63 | + assert aic < Max_Aic_Bic |
| 64 | + |
| 65 | + |
| 66 | +def test_calculate_aic_correctness(test_dataframe, test_model_results): |
| 67 | + residuals = _compute_residuals(test_dataframe, test_model_results) |
| 68 | + |
| 69 | + n = len(residuals) |
| 70 | + |
| 71 | + p = 2 |
| 72 | + |
| 73 | + sigma_squared = np.var(residuals, ddof=1) |
| 74 | + |
| 75 | + expected_aic = 2 * p + n * np.log(sigma_squared) |
| 76 | + |
| 77 | + computed_aic = _calculate_aic(residuals, p) |
| 78 | + assert np.isclose(computed_aic, expected_aic, atol=1.5), ( |
| 79 | + f"AIC does not match: expected {expected_aic}, got {computed_aic}" |
| 80 | + ) |
| 81 | + |
| 82 | + |
| 83 | +def test_calculate_bic_output(test_dataframe, test_model_results): |
| 84 | + residuals = _compute_residuals(test_dataframe, test_model_results) |
| 85 | + bic = _calculate_bic(residuals, p=2) |
| 86 | + |
| 87 | + assert isinstance(bic, float) |
| 88 | + assert bic < Max_Aic_Bic |
| 89 | + |
| 90 | + |
| 91 | +def test_calculate_bic_correctness(test_dataframe, test_model_results): |
| 92 | + residuals = _compute_residuals(test_dataframe, test_model_results) |
| 93 | + |
| 94 | + n = len(residuals) |
| 95 | + |
| 96 | + p = 2 |
| 97 | + |
| 98 | + sigma_squared = np.var(residuals, ddof=1) |
| 99 | + |
| 100 | + expected_bic = p * np.log(n) + n * np.log(sigma_squared) |
| 101 | + |
| 102 | + computed_bic = _calculate_bic(residuals, p) |
| 103 | + |
| 104 | + assert np.isclose(computed_bic, expected_bic, atol=5.0), ( |
| 105 | + f"BIC does not match: expected {expected_bic}, got {computed_bic}" |
| 106 | + ) |
| 107 | + |
| 108 | + |
| 109 | +def test_predict_ar(test_dataframe, test_model_results): |
| 110 | + predictions = _predict_ar(test_dataframe, test_model_results) |
| 111 | + |
| 112 | + assert len(predictions) == len(test_dataframe) - ( |
| 113 | + len(test_model_results["integrated_coefficients"]) - 1 |
| 114 | + ) |
| 115 | + assert isinstance(predictions, np.ndarray) |
| 116 | + assert not np.isnan(predictions).all() |
0 commit comments