@@ -10,52 +10,138 @@ from pylibcudf.libcudf.io.avro cimport (
10
10
)
11
11
from pylibcudf.libcudf.types cimport size_type
12
12
13
- __all__ = [" read_avro" ]
13
+ __all__ = [" read_avro" , " AvroReaderOptions" , " AvroReaderOptionsBuilder" ]
14
+
15
+
16
+ cdef class AvroReaderOptions:
17
+ """
18
+ The settings to use for ``read_avro``
19
+ For details, see :cpp:class:`cudf::io::avro_reader_options`
20
+ """
21
+ @staticmethod
22
+ def builder (SourceInfo source ):
23
+ """
24
+ Create a AvroWriterOptionsBuilder object
25
+
26
+ For details, see :cpp:func:`cudf::io::avro_reader_options::builder`
27
+
28
+ Parameters
29
+ ----------
30
+ sink : SourceInfo
31
+ The source to read the Avro file from.
32
+
33
+ Returns
34
+ -------
35
+ AvroReaderOptionsBuilder
36
+ Builder to build AvroReaderOptions
37
+ """
38
+ cdef AvroReaderOptionsBuilder avro_builder = AvroReaderOptionsBuilder.__new__ (
39
+ AvroReaderOptionsBuilder
40
+ )
41
+ avro_builder.c_obj = avro_reader_options.builder(source.c_obj)
42
+ avro_builder.source = source
43
+ return avro_builder
44
+
45
+ cpdef void set_columns(self , list col_names):
46
+ """
47
+ Set names of the column to be read.
48
+
49
+ Parameters
50
+ ----------
51
+ col_names : list[str]
52
+ List of column names
53
+
54
+ Returns
55
+ -------
56
+ None
57
+ """
58
+ cdef vector[string] vec
59
+ vec.reserve(len (col_names))
60
+ for name in col_names:
61
+ vec.push_back(str (name).encode())
62
+ self .c_obj.set_columns(vec)
63
+
64
+
65
+ cdef class AvroReaderOptionsBuilder:
66
+ cpdef AvroReaderOptionsBuilder columns(self , list col_names):
67
+ """
68
+ Set names of the column to be read.
69
+
70
+ Parameters
71
+ ----------
72
+ col_names : list
73
+ List of column names
74
+
75
+ Returns
76
+ -------
77
+ AvroReaderOptionsBuilder
78
+ """
79
+ cdef vector[string] vec
80
+ vec.reserve(len (col_names))
81
+ for name in col_names:
82
+ vec.push_back(str (name).encode())
83
+ self .c_obj.columns(vec)
84
+ return self
85
+
86
+ cpdef AvroReaderOptionsBuilder skip_rows(self , size_type skip_rows):
87
+ """
88
+ Sets number of rows to skip.
89
+
90
+ Parameters
91
+ ----------
92
+ skip_rows : size_type
93
+ Number of rows to skip from start
94
+
95
+ Returns
96
+ -------
97
+ AvroReaderOptionsBuilder
98
+ """
99
+ self .c_obj.skip_rows(skip_rows)
100
+ return self
101
+
102
+ cpdef AvroReaderOptionsBuilder num_rows(self , size_type num_rows):
103
+ """
104
+ Sets number of rows to read.
105
+
106
+ Parameters
107
+ ----------
108
+ num_rows : size_type
109
+ Number of rows to read after skip
110
+
111
+ Returns
112
+ -------
113
+ AvroReaderOptionsBuilder
114
+ """
115
+ self .c_obj.num_rows(num_rows)
116
+ return self
117
+
118
+ cpdef AvroReaderOptions build(self ):
119
+ """ Create a AvroReaderOptions object"""
120
+ cdef AvroReaderOptions avro_options = AvroReaderOptions.__new__ (
121
+ AvroReaderOptions
122
+ )
123
+ avro_options.c_obj = move(self .c_obj.build())
124
+ avro_options.source = self .source
125
+ return avro_options
14
126
15
127
16
128
cpdef TableWithMetadata read_avro(
17
- SourceInfo source_info,
18
- list columns = None ,
19
- size_type skip_rows = 0 ,
20
- size_type num_rows = - 1
129
+ AvroReaderOptions options
21
130
):
22
131
"""
23
- Reads an Avro dataset into a :py:class:`~.types.TableWithMetadata`.
132
+ Read from Avro format.
133
+
134
+ The source to read from and options are encapsulated
135
+ by the `options` object.
24
136
25
137
For details, see :cpp:func:`read_avro`.
26
138
27
139
Parameters
28
140
----------
29
- source_info: SourceInfo
30
- The SourceInfo object to read the avro dataset from.
31
- columns: list, default None
32
- Optional columns to read, if not provided, reads all columns in the file.
33
- skip_rows: size_type, default 0
34
- The number of rows to skip.
35
- num_rows: size_type, default -1
36
- The number of rows to read, after skipping rows.
37
- If -1 is passed, all rows will be read.
38
-
39
- Returns
40
- -------
41
- TableWithMetadata
42
- The Table and its corresponding metadata (column names) that were read in.
141
+ options: AvroReaderOptions
142
+ Settings for controlling reading behavior
43
143
"""
44
- cdef vector[string] c_columns
45
- if columns is not None and len (columns) > 0 :
46
- c_columns.reserve(len (columns))
47
- for col in columns:
48
- c_columns.push_back(str (col).encode())
49
-
50
- cdef avro_reader_options avro_opts = (
51
- avro_reader_options.builder(source_info.c_obj)
52
- .columns(c_columns)
53
- .skip_rows(skip_rows)
54
- .num_rows(num_rows)
55
- .build()
56
- )
57
-
58
144
with nogil:
59
- c_result = move(cpp_read_avro(avro_opts ))
145
+ c_result = move(cpp_read_avro(options.c_obj ))
60
146
61
147
return TableWithMetadata.from_libcudf(c_result)
0 commit comments