Skip to content

Commit 1d3a3cf

Browse files
committed
Complete and modify test_evaluate_ar_model.py and test_fit_ar_model.py so that all functions are tested and number of assert statements per test function have been reduced.
1 parent 1e5f2d2 commit 1d3a3cf

File tree

2 files changed

+201
-37
lines changed

2 files changed

+201
-37
lines changed

tests/analysis/test_evaluate_ar_model.py

Lines changed: 77 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
_calculate_bic,
1010
_compute_residuals,
1111
_predict_ar,
12+
evaluate_ar_models,
1213
)
1314

1415
Max_Aic_Bic = 1e6
@@ -46,59 +47,58 @@ def test_model_results(test_dataframe):
4647

4748

4849
def test_compute_residuals(test_dataframe, test_model_results):
50+
"""Test that _compute_residuals returns a valid non-NaN pandas Series."""
4951
residuals = _compute_residuals(test_dataframe, test_model_results)
5052

51-
assert len(residuals) == len(test_dataframe) - (
52-
len(test_model_results["integrated_coefficients"]) - 1
53+
assert all(
54+
[
55+
isinstance(residuals, pd.Series),
56+
len(residuals)
57+
== len(test_dataframe)
58+
- (len(test_model_results["integrated_coefficients"]) - 1),
59+
not np.isnan(residuals).all(),
60+
]
5361
)
54-
assert isinstance(residuals, pd.Series)
55-
assert not np.isnan(residuals).all()
5662

5763

5864
def test_calculate_aic_output(test_dataframe, test_model_results):
65+
"""Test that _calculate_aic returns a valid float within a reasonable range."""
5966
residuals = _compute_residuals(test_dataframe, test_model_results)
6067
aic = _calculate_aic(residuals, p=2)
6168

62-
assert isinstance(aic, float)
63-
assert aic < Max_Aic_Bic
69+
assert all([isinstance(aic, float), aic < Max_Aic_Bic])
6470

6571

6672
def test_calculate_aic_correctness(test_dataframe, test_model_results):
73+
"""Test that _calculate_aic computes a nearly equal value to the formula."""
6774
residuals = _compute_residuals(test_dataframe, test_model_results)
6875

6976
n = len(residuals)
70-
7177
p = 2
72-
7378
sigma_squared = np.var(residuals, ddof=1)
74-
7579
expected_aic = 2 * p + n * np.log(sigma_squared)
76-
7780
computed_aic = _calculate_aic(residuals, p)
7881
assert np.isclose(computed_aic, expected_aic, atol=1.5), (
7982
f"AIC does not match: expected {expected_aic}, got {computed_aic}"
8083
)
8184

8285

8386
def test_calculate_bic_output(test_dataframe, test_model_results):
87+
"""Test that _calculate_bic returns a valid float within a reasonable range."""
8488
residuals = _compute_residuals(test_dataframe, test_model_results)
8589
bic = _calculate_bic(residuals, p=2)
8690

87-
assert isinstance(bic, float)
88-
assert bic < Max_Aic_Bic
91+
assert all([isinstance(bic, float), bic < Max_Aic_Bic])
8992

9093

9194
def test_calculate_bic_correctness(test_dataframe, test_model_results):
95+
"""Test that _calculate_bic computes a nearly equal value to the formula."""
9296
residuals = _compute_residuals(test_dataframe, test_model_results)
9397

9498
n = len(residuals)
95-
9699
p = 2
97-
98100
sigma_squared = np.var(residuals, ddof=1)
99-
100101
expected_bic = p * np.log(n) + n * np.log(sigma_squared)
101-
102102
computed_bic = _calculate_bic(residuals, p)
103103

104104
assert np.isclose(computed_bic, expected_bic, atol=5.0), (
@@ -107,10 +107,67 @@ def test_calculate_bic_correctness(test_dataframe, test_model_results):
107107

108108

109109
def test_predict_ar(test_dataframe, test_model_results):
110+
"""Test that _predict_ar produces valid non-NaN numpy predictions."""
110111
predictions = _predict_ar(test_dataframe, test_model_results)
111112

112-
assert len(predictions) == len(test_dataframe) - (
113-
len(test_model_results["integrated_coefficients"]) - 1
113+
assert all(
114+
[
115+
len(predictions)
116+
== len(test_dataframe)
117+
- (len(test_model_results["integrated_coefficients"]) - 1),
118+
isinstance(predictions, np.ndarray),
119+
not np.isnan(predictions).all(),
120+
]
114121
)
115-
assert isinstance(predictions, np.ndarray)
116-
assert not np.isnan(predictions).all()
122+
123+
124+
def test_evaluate_ar_models_output_structure(test_dataframe):
125+
"""Test that evaluate_ar_models returns a dictionary with expected keys."""
126+
results = evaluate_ar_models(test_dataframe, max_p=5, criterion="aic")
127+
assert set(results.keys()) == {"top_models", "model_metrics", "metadata"}
128+
129+
130+
def test_evaluate_ar_models_top_models_length(test_dataframe):
131+
"""Test that 'top_models' contains up to 3 models."""
132+
count_top_models = 3
133+
results = evaluate_ar_models(test_dataframe, max_p=5, criterion="aic")
134+
assert len(results["top_models"]) <= count_top_models
135+
136+
137+
def test_evaluate_ar_models_model_metrics_length(test_dataframe):
138+
"""Test that 'model_metrics' contains max_p models."""
139+
max_p = 5
140+
results = evaluate_ar_models(test_dataframe, max_p=max_p, criterion="aic")
141+
assert len(results["model_metrics"]) == max_p
142+
143+
144+
def test_evaluate_ar_models_metadata(test_dataframe):
145+
"""Test that 'metadata' contains correct max_p and criterion values."""
146+
max_p = 5
147+
criterion = "aic"
148+
results = evaluate_ar_models(test_dataframe, max_p=max_p, criterion=criterion)
149+
assert results["metadata"] == {"max_p": max_p, "criterion": criterion}
150+
151+
152+
def test_evaluate_ar_models_top_models_sorted(test_dataframe):
153+
"""Test that 'top_models' are sorted by the chosen criterion."""
154+
results = evaluate_ar_models(test_dataframe, max_p=5, criterion="aic")
155+
sorted_aic_values = [model["aic"] for model in results["top_models"]]
156+
assert sorted_aic_values == sorted(sorted_aic_values)
157+
158+
159+
def test_evaluate_ar_models_model_keys(test_dataframe):
160+
"""Test that each model result contains expected keys."""
161+
results = evaluate_ar_models(test_dataframe, max_p=5, criterion="aic")
162+
required_keys = {"p", "aic", "bic", "p_value", "differenced", "coefficients"}
163+
164+
for model in results["model_metrics"]:
165+
assert set(model.keys()).issuperset(required_keys)
166+
167+
168+
def test_evaluate_ar_models_coefficients_type(test_dataframe):
169+
"""Test that 'coefficients' in model results are numpy arrays."""
170+
results = evaluate_ar_models(test_dataframe, max_p=5, criterion="aic")
171+
172+
for model in results["model_metrics"]:
173+
assert isinstance(model["coefficients"], np.ndarray)

tests/analysis/test_fit_ar_model.py

Lines changed: 124 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -7,43 +7,71 @@
77
_check_stationarity,
88
_create_lagged_features,
99
_difference_series,
10+
_integrate_ar_coefficients,
11+
fit_ar_model,
1012
)
1113

12-
significane = 0.05
14+
significance = 0.05
1315
expected_coeff_count = 3 # For p=2: Intercept + 2 Lags
1416
expected_length_lagged = 3
1517

1618

17-
def test_check_stationarity():
19+
def test_check_stationarity_stationary():
20+
"""Test ADF stationarity check on a stationary time series."""
1821
rng = np.random.default_rng(12)
1922
df = pd.DataFrame({"price": rng.normal(0, 1, 100)})
20-
stationary, p_value = _check_stationarity(df, "price", significance=significane)
21-
assert bool(stationary) is True
2223

23-
# 2. Non-stationary series
24-
df["price"] = np.linspace(1, 100, 100) + rng.normal(0, 0.5, 100)
25-
stationary, p_value = _check_stationarity(df, "price", significance=significane)
26-
assert bool(stationary) is False
27-
assert p_value > significane
24+
stationary, p_value, test_statistic = _check_stationarity(df, "price", significance)
25+
26+
assert all(
27+
[
28+
bool(stationary) is True,
29+
isinstance(p_value, float),
30+
isinstance(test_statistic, float),
31+
]
32+
)
33+
34+
35+
def test_check_stationarity_non_stationary():
36+
"""Test ADF stationarity check on a nonstationary time series."""
37+
rng = np.random.default_rng(12)
38+
df = pd.DataFrame({"price": np.linspace(1, 100, 100) + rng.normal(0, 0.5, 100)})
39+
40+
stationary, p_value, test_statistic = _check_stationarity(df, "price", significance)
41+
42+
assert all(
43+
[
44+
bool(stationary) is False,
45+
p_value > significance,
46+
isinstance(test_statistic, float),
47+
]
48+
)
2849

2950

3051
def test_difference_series():
52+
"""Test differencing function for correct column creation and values."""
3153
df = pd.DataFrame({"price": [100, 101, 103, 106]})
3254
result = _difference_series(df.copy(), "price")
33-
assert "diff_price" in result.columns
34-
# Check that the second value of the differenced series equals 1.
35-
assert result["diff_price"].iloc[1] == 1
55+
56+
assert all(["diff_price" in result.columns, result["diff_price"].iloc[1] == 1])
3657

3758

3859
def test_create_lagged_features():
60+
"""Test creation of lagged features for an AR model."""
3961
df = pd.DataFrame({"price": [10, 20, 30, 40, 50]})
4062
result = _create_lagged_features(df.copy(), "price", 2)
41-
assert "price_lag1" in result.columns
42-
assert "price_lag2" in result.columns
43-
assert len(result) == expected_length_lagged
63+
64+
assert all(
65+
[
66+
"price_lag1" in result.columns,
67+
"price_lag2" in result.columns,
68+
len(result) == expected_length_lagged,
69+
]
70+
)
4471

4572

4673
def test_ar_model_output():
74+
"""Test output structure and type of AR model fitting function."""
4775
rng = np.random.default_rng(312)
4876

4977
dates = pd.date_range("2020-01-01", periods=10, freq="D")
@@ -62,11 +90,11 @@ def test_ar_model_output():
6290

6391
coeffs = _ar_model(df, "price", 2)
6492

65-
assert len(coeffs) == expected_coeff_count
66-
assert isinstance(coeffs, np.ndarray)
93+
assert all([len(coeffs) == expected_coeff_count, isinstance(coeffs, np.ndarray)])
6794

6895

6996
def test_ar_model_correctness():
97+
"""Test correctness of AR model coefficients against a reference implementation."""
7098
rng = np.random.default_rng(2)
7199
dates = pd.date_range("2020-01-01", periods=100, freq="D")
72100
df = pd.DataFrame(
@@ -89,3 +117,82 @@ def test_ar_model_correctness():
89117
f"Reference: {reference_coeffs}\n"
90118
f"Difference: {custom_coeffs - reference_coeffs}"
91119
)
120+
121+
122+
def test_integrate_ar_coefficients_no_differencing():
123+
"""Test integration of AR coefficients when no differencing is applied."""
124+
diff_coeffs = np.array([0.5, -0.2, 0.1])
125+
126+
result = _integrate_ar_coefficients(diff_coeffs, differenced=False)
127+
expected_lags = ["Intercept", "Lag 1", "Lag 2"]
128+
assert all(
129+
[
130+
np.allclose(result["coefficient"].to_numpy(), diff_coeffs),
131+
list(result["lag"]) == expected_lags,
132+
]
133+
)
134+
135+
136+
def test_integrate_ar_coefficients_with_differencing():
137+
"""Test integration of AR coefficients when differencing is applied."""
138+
diff_coeffs = np.array([0.5, -0.2, 0.1])
139+
140+
result = _integrate_ar_coefficients(diff_coeffs, differenced=True)
141+
142+
expected_coeffs = np.array([0.5, 1 - 0.2, -0.2 - 0.1, -0.1])
143+
expected_lags = ["Intercept", "Lag 1", "Lag 2", "Lag 3"]
144+
assert all(
145+
[
146+
np.allclose(result["coefficient"].to_numpy(), expected_coeffs),
147+
list(result["lag"]) == expected_lags,
148+
]
149+
)
150+
151+
152+
def test_fit_ar_model_stationary_series():
153+
"""Test AR model fitting on a stationary time series."""
154+
rng = np.random.default_rng(42)
155+
df = pd.DataFrame(
156+
{"close_price": np.sin(np.linspace(0, 10, 100)) + rng.normal(0, 0.1, 100)}
157+
)
158+
159+
result = fit_ar_model(df, column="close_price", p=2)
160+
161+
assert result["differenced"] is False
162+
163+
164+
def test_fit_ar_model_non_stationary_series():
165+
"""Test AR model fitting on a non-stationary time series."""
166+
rng = np.random.default_rng(42)
167+
df = pd.DataFrame(
168+
{"close_price": np.cumsum(np.linspace(0.1, 1, 100)) + rng.normal(0, 0.1, 100)}
169+
)
170+
171+
result = fit_ar_model(df, column="close_price", p=2)
172+
173+
assert result["differenced"] is True
174+
175+
176+
def test_fit_ar_model_coefficient_shape():
177+
"""Test if the number of coefficients matches AR order + intercept."""
178+
rng = np.random.default_rng(42)
179+
df = pd.DataFrame(
180+
{"close_price": np.cos(np.linspace(0, 10, 100)) + rng.normal(0, 0.1, 100)}
181+
)
182+
183+
p = 3
184+
result = fit_ar_model(df, column="close_price", p=p)
185+
186+
assert result["coefficients"].shape[0] == p + 1
187+
188+
189+
def test_fit_ar_model_p_value():
190+
"""Test if p-value is included in the result dictionary."""
191+
rng = np.random.default_rng(42)
192+
df = pd.DataFrame(
193+
{"close_price": np.exp(np.linspace(0, 2, 100)) + rng.normal(0, 0.1, 100)}
194+
)
195+
196+
result = fit_ar_model(df, column="close_price", p=2)
197+
198+
assert isinstance(result["p_value"], float)

0 commit comments

Comments
 (0)