Skip to content

Commit 990ee4e

Browse files
committed
Add docstrings for functions in clean_apple.py.
1 parent 065f330 commit 990ee4e

File tree

1 file changed

+59
-7
lines changed

1 file changed

+59
-7
lines changed

src/lennart_epp/data_management/clean_apple.py

Lines changed: 59 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2,41 +2,93 @@
22

33

44
def _select_and_rename_column(df: pd.DataFrame) -> pd.DataFrame:
5-
"""Select the Close column and rename it to close_price."""
5+
"""Select the 'Close' column and rename it to 'close_price'.
6+
7+
Args:
8+
df (pd.DataFrame): Input DataFrame containing a 'Close' column.
9+
10+
Returns:
11+
pd.DataFrame: DataFrame with a single column named 'close_price'.
12+
"""
613
return df[["Close"]].rename(columns={"Close": "close_price"})
714

815

916
def _handle_missing_values(df: pd.DataFrame) -> pd.DataFrame:
10-
"""Handle missing values by forward and backward filling."""
17+
"""Fill missing values using forward and backward filling.
18+
19+
Args:
20+
df (pd.DataFrame): Input DataFrame that may contain missing values.
21+
22+
Returns:
23+
pd.DataFrame: DataFrame with no missing values.
24+
"""
1125
return df.ffill().bfill()
1226

1327

1428
def _remove_duplicates(df: pd.DataFrame) -> pd.DataFrame:
15-
"""Remove duplicate rows based on the index."""
29+
"""Remove duplicate rows based on the index.
30+
31+
Args:
32+
df (pd.DataFrame): Input DataFrame that may contain duplicate index entries.
33+
34+
Returns:
35+
pd.DataFrame: DataFrame without duplicate index entries.
36+
"""
1637
return df[~df.index.duplicated(keep="first")]
1738

1839

1940
def _convert_to_numeric(df: pd.DataFrame) -> pd.DataFrame:
20-
"""Convert the entire DataFrame to numeric types, rounding to 2 decimal places."""
41+
"""Convert all columns in the DataFrame to numeric types.
42+
43+
Args:
44+
df (pd.DataFrame): Input DataFrame containing numeric data.
45+
46+
Returns:
47+
pd.DataFrame: DataFrame with all values converted to numeric types.
48+
"""
2149
return df.apply(pd.to_numeric, errors="coerce").round(2).astype("float32")
2250

2351

2452
def _validate_dataframe(df: pd.DataFrame):
25-
"""Ensure the DataFrame contains the necessary columns."""
53+
"""Validate that the DataFrame contains the required columns.
54+
55+
Args:
56+
df (pd.DataFrame): Input DataFrame to validate.
57+
58+
Raises:
59+
ValueError: If the 'Close' column is not found in the DataFrame.
60+
"""
2661
missing_col_msg = "The DataFrame does not contain a 'Close' column."
2762
if "Close" not in df.columns:
2863
raise ValueError(missing_col_msg)
2964

3065

3166
def _convert_to_datetime(df: pd.DataFrame) -> pd.DataFrame:
32-
"""Convert the Date column to a datetime index."""
67+
"""Convert the 'Date' column to a datetime index.
68+
69+
Args:
70+
df (pd.DataFrame): Input DataFrame containing a 'Date' column.
71+
72+
Returns:
73+
pd.DataFrame: DataFrame with a datetime index.
74+
"""
3375
df["Date"] = pd.to_datetime(df["Date"])
3476
df = df.set_index("Date")
3577
return df
3678

3779

3880
def clean_apple_data(df: pd.DataFrame) -> pd.DataFrame:
39-
"""Cleans the raw data and returns the cleaned DataFrame."""
81+
"""Clean and preprocess raw Apple stock data.
82+
83+
Args:
84+
df (pd.DataFrame): Raw input DataFrame.
85+
86+
Returns:
87+
pd.DataFrame: Cleaned DataFrame ready for analysis.
88+
89+
Raises:
90+
ValueError: If the required 'Close' column is missing in the input DataFrame.
91+
"""
4092
_validate_dataframe(df)
4193

4294
df = _convert_to_datetime(df)

0 commit comments

Comments
 (0)