|
2 | 2 |
|
3 | 3 |
|
4 | 4 | def _select_and_rename_column(df: pd.DataFrame) -> pd.DataFrame: |
5 | | - """Select the Close column and rename it to close_price.""" |
| 5 | + """Select the 'Close' column and rename it to 'close_price'. |
| 6 | +
|
| 7 | + Args: |
| 8 | + df (pd.DataFrame): Input DataFrame containing a 'Close' column. |
| 9 | +
|
| 10 | + Returns: |
| 11 | + pd.DataFrame: DataFrame with a single column named 'close_price'. |
| 12 | + """ |
6 | 13 | return df[["Close"]].rename(columns={"Close": "close_price"}) |
7 | 14 |
|
8 | 15 |
|
9 | 16 | def _handle_missing_values(df: pd.DataFrame) -> pd.DataFrame: |
10 | | - """Handle missing values by forward and backward filling.""" |
| 17 | + """Fill missing values using forward and backward filling. |
| 18 | +
|
| 19 | + Args: |
| 20 | + df (pd.DataFrame): Input DataFrame that may contain missing values. |
| 21 | +
|
| 22 | + Returns: |
| 23 | + pd.DataFrame: DataFrame with no missing values. |
| 24 | + """ |
11 | 25 | return df.ffill().bfill() |
12 | 26 |
|
13 | 27 |
|
14 | 28 | def _remove_duplicates(df: pd.DataFrame) -> pd.DataFrame: |
15 | | - """Remove duplicate rows based on the index.""" |
| 29 | + """Remove duplicate rows based on the index. |
| 30 | +
|
| 31 | + Args: |
| 32 | + df (pd.DataFrame): Input DataFrame that may contain duplicate index entries. |
| 33 | +
|
| 34 | + Returns: |
| 35 | + pd.DataFrame: DataFrame without duplicate index entries. |
| 36 | + """ |
16 | 37 | return df[~df.index.duplicated(keep="first")] |
17 | 38 |
|
18 | 39 |
|
19 | 40 | def _convert_to_numeric(df: pd.DataFrame) -> pd.DataFrame: |
20 | | - """Convert the entire DataFrame to numeric types, rounding to 2 decimal places.""" |
| 41 | + """Convert all columns in the DataFrame to numeric types. |
| 42 | +
|
| 43 | + Args: |
| 44 | + df (pd.DataFrame): Input DataFrame containing numeric data. |
| 45 | +
|
| 46 | + Returns: |
| 47 | + pd.DataFrame: DataFrame with all values converted to numeric types. |
| 48 | + """ |
21 | 49 | return df.apply(pd.to_numeric, errors="coerce").round(2).astype("float32") |
22 | 50 |
|
23 | 51 |
|
24 | 52 | def _validate_dataframe(df: pd.DataFrame): |
25 | | - """Ensure the DataFrame contains the necessary columns.""" |
| 53 | + """Validate that the DataFrame contains the required columns. |
| 54 | +
|
| 55 | + Args: |
| 56 | + df (pd.DataFrame): Input DataFrame to validate. |
| 57 | +
|
| 58 | + Raises: |
| 59 | + ValueError: If the 'Close' column is not found in the DataFrame. |
| 60 | + """ |
26 | 61 | missing_col_msg = "The DataFrame does not contain a 'Close' column." |
27 | 62 | if "Close" not in df.columns: |
28 | 63 | raise ValueError(missing_col_msg) |
29 | 64 |
|
30 | 65 |
|
31 | 66 | def _convert_to_datetime(df: pd.DataFrame) -> pd.DataFrame: |
32 | | - """Convert the Date column to a datetime index.""" |
| 67 | + """Convert the 'Date' column to a datetime index. |
| 68 | +
|
| 69 | + Args: |
| 70 | + df (pd.DataFrame): Input DataFrame containing a 'Date' column. |
| 71 | +
|
| 72 | + Returns: |
| 73 | + pd.DataFrame: DataFrame with a datetime index. |
| 74 | + """ |
33 | 75 | df["Date"] = pd.to_datetime(df["Date"]) |
34 | 76 | df = df.set_index("Date") |
35 | 77 | return df |
36 | 78 |
|
37 | 79 |
|
38 | 80 | def clean_apple_data(df: pd.DataFrame) -> pd.DataFrame: |
39 | | - """Cleans the raw data and returns the cleaned DataFrame.""" |
| 81 | + """Clean and preprocess raw Apple stock data. |
| 82 | +
|
| 83 | + Args: |
| 84 | + df (pd.DataFrame): Raw input DataFrame. |
| 85 | +
|
| 86 | + Returns: |
| 87 | + pd.DataFrame: Cleaned DataFrame ready for analysis. |
| 88 | +
|
| 89 | + Raises: |
| 90 | + ValueError: If the required 'Close' column is missing in the input DataFrame. |
| 91 | + """ |
40 | 92 | _validate_dataframe(df) |
41 | 93 |
|
42 | 94 | df = _convert_to_datetime(df) |
|
0 commit comments