Skip to content

Commit 5198f5b

Browse files
committed
Add test file for data management.
1 parent 1f3dc1b commit 5198f5b

File tree

1 file changed

+78
-0
lines changed

1 file changed

+78
-0
lines changed
Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,78 @@
1+
import numpy as np
2+
import pandas as pd
3+
import pytest
4+
5+
from lennart_epp.data_management.clean_apple import (
6+
_convert_to_datetime,
7+
_convert_to_numeric,
8+
_handle_missing_values,
9+
_remove_duplicates,
10+
_select_and_rename_column,
11+
_validate_dataframe,
12+
)
13+
14+
15+
@pytest.fixture
16+
def raw_data():
17+
data = {
18+
"Date": [
19+
"2022-01-01",
20+
"2022-01-02",
21+
"2022-01-03",
22+
"2022-01-03",
23+
"2022-01-04",
24+
],
25+
"Close": [100, np.nan, 102, 102, 105],
26+
"Other": ["A", "B", "C", "C", "D"],
27+
}
28+
return pd.DataFrame(data)
29+
30+
31+
def test_select_and_rename_column(raw_data):
32+
result = _select_and_rename_column(raw_data)
33+
assert list(result.columns) == ["close_price"]
34+
pd.testing.assert_series_equal(
35+
result["close_price"], raw_data["Close"], check_names=False
36+
)
37+
38+
39+
expected_value_missing = 100
40+
41+
42+
def test_handle_missing_values(raw_data):
43+
result = _handle_missing_values(raw_data)
44+
assert result.isna().sum().sum() == 0
45+
assert result.loc[1, "Close"] == expected_value_missing
46+
47+
48+
expected_length_index = 4
49+
50+
51+
def test_remove_duplicates(raw_data):
52+
df = raw_data.copy().set_index("Date")
53+
result = _remove_duplicates(df)
54+
assert result.index.duplicated().sum() == 0
55+
assert len(result) == expected_length_index
56+
57+
58+
def test_convert_to_numeric(raw_data):
59+
df = raw_data.copy().astype(str)
60+
result = _convert_to_numeric(df)
61+
for col in result.columns:
62+
assert result[col].dtype == "float32"
63+
np.testing.assert_almost_equal(result.loc[0, "Close"], 100.0, decimal=2)
64+
65+
66+
def test_validate_dataframe(raw_data):
67+
_validate_dataframe(raw_data)
68+
df_missing = raw_data.drop(columns=["Close"])
69+
with pytest.raises(
70+
ValueError, match="The DataFrame does not contain a 'Close' column."
71+
):
72+
_validate_dataframe(df_missing)
73+
74+
75+
def test_convert_to_datetime(raw_data):
76+
result = _convert_to_datetime(raw_data.copy())
77+
assert isinstance(result.index, pd.DatetimeIndex)
78+
assert result.index[0] == pd.Timestamp("2022-01-01")

0 commit comments

Comments
 (0)