|
1 | 1 | import csv |
2 | 2 | import os |
3 | | -from typing import Any, Iterable, Iterator, Sequence, Union |
| 3 | +from typing import Any, Dict, Iterable, Iterator, List, Optional, Sequence, Union |
4 | 4 | import pandas as pd |
5 | 5 | from pandas._typing import FilePath, ReadCsvBuffer |
6 | 6 | import pyfastx |
@@ -28,39 +28,36 @@ class InvalidDelimiter(Exception): |
28 | 28 |
|
29 | 29 |
|
30 | 30 | def read_metadata( |
31 | | - metadata_file, |
32 | | - delimiters=DEFAULT_DELIMITERS, |
33 | | - columns=None, |
34 | | - id_columns=DEFAULT_ID_COLUMNS, |
35 | | - keep_id_as_column=False, |
36 | | - chunk_size=None, |
37 | | - dtype=None, |
38 | | - ): |
| 31 | + metadata_file: str, |
| 32 | + delimiters: Sequence[str] = DEFAULT_DELIMITERS, |
| 33 | + columns: Optional[List[str]] = None, |
| 34 | + id_columns: Sequence[str] = DEFAULT_ID_COLUMNS, |
| 35 | + keep_id_as_column: bool = False, |
| 36 | + chunk_size: Optional[int] = None, |
| 37 | + dtype: Optional[Union[Dict[str, Any], str]] = None, |
| 38 | + ) -> Union[pd.DataFrame, Iterator[pd.DataFrame]]: |
39 | 39 | r"""Read metadata from a given filename and into a pandas `DataFrame` or |
40 | | - `TextFileReader` object. |
| 40 | + iterator of DataFrames when `chunk_size` is specified. |
41 | 41 |
|
42 | 42 | Parameters |
43 | 43 | ---------- |
44 | | - metadata_file : str |
| 44 | + metadata_file |
45 | 45 | Path to a metadata file to load. |
46 | | - delimiters : list of str |
| 46 | + delimiters |
47 | 47 | List of possible delimiters to check for between columns in the metadata. |
48 | 48 | Only one delimiter will be inferred. |
49 | | - columns : list of str |
| 49 | + columns |
50 | 50 | List of columns to read. If unspecified, read all columns. |
51 | | - id_columns : list of str |
| 51 | + id_columns |
52 | 52 | List of possible id column names to check for, ordered by priority. |
53 | 53 | Only one id column will be inferred. |
54 | | - keep_id_as_column : bool |
| 54 | + keep_id_as_column |
55 | 55 | If true, keep the resolved id column as a column in addition to setting it as the DataFrame index. |
56 | | - chunk_size : int |
| 56 | + chunk_size |
57 | 57 | Size of chunks to stream from disk with an iterator instead of loading the entire input file into memory. |
58 | | - dtype : dict or str |
| 58 | + dtype |
59 | 59 | Data types to apply to columns in metadata. If unspecified, pandas data type inference will be used. |
60 | 60 | See documentation for an argument of the same name to `pandas.read_csv()`. |
61 | | - Returns |
62 | | - ------- |
63 | | - pandas.DataFrame or `pandas.io.parsers.TextFileReader` |
64 | 61 |
|
65 | 62 | Raises |
66 | 63 | ------ |
|
0 commit comments