1
- //! Conversions from kernel types to arrow types
1
+ //! Conversions from kernel schema types to arrow schema types.
2
2
3
3
use std:: sync:: Arc ;
4
4
@@ -19,19 +19,57 @@ pub(crate) const MAP_ROOT_DEFAULT: &str = "key_value";
19
19
pub ( crate ) const MAP_KEY_DEFAULT : & str = "key" ;
20
20
pub ( crate ) const MAP_VALUE_DEFAULT : & str = "value" ;
21
21
22
- impl TryFrom < & StructType > for ArrowSchema {
23
- type Error = ArrowError ;
22
+ /// Convert a kernel type into an arrow type (automatically implemented for all types that
23
+ /// implement [`TryFromKernel`])
24
+ pub trait TryIntoArrow < ArrowType > {
25
+ fn try_into_arrow ( self ) -> Result < ArrowType , ArrowError > ;
26
+ }
24
27
25
- fn try_from ( s : & StructType ) -> Result < Self , ArrowError > {
26
- let fields: Vec < ArrowField > = s. fields ( ) . map ( TryInto :: try_into) . try_collect ( ) ?;
27
- Ok ( ArrowSchema :: new ( fields) )
28
+ /// Convert an arrow type into a kernel type (a similar [`TryIntoKernel`] trait is automatically
29
+ /// implemented for all types that implement [`TryFromArrow`])
30
+ pub trait TryFromArrow < ArrowType > : Sized {
31
+ fn try_from_arrow ( t : ArrowType ) -> Result < Self , ArrowError > ;
32
+ }
33
+
34
+ /// Convert an arrow type into a kernel type (automatically implemented for all types that
35
+ /// implement [`TryFromArrow`])
36
+ pub trait TryIntoKernel < KernelType > {
37
+ fn try_into_kernel ( self ) -> Result < KernelType , ArrowError > ;
38
+ }
39
+
40
+ /// Convert a kernel type into an arrow type (a similar [`TryIntoArrow`] trait is automatically
41
+ /// implemented for all types that implement [`TryFromKernel`])
42
+ pub trait TryFromKernel < KernelType > : Sized {
43
+ fn try_from_kernel ( t : KernelType ) -> Result < Self , ArrowError > ;
44
+ }
45
+
46
+ impl < KernelType , ArrowType > TryIntoArrow < ArrowType > for KernelType
47
+ where
48
+ ArrowType : TryFromKernel < KernelType > ,
49
+ {
50
+ fn try_into_arrow ( self ) -> Result < ArrowType , ArrowError > {
51
+ ArrowType :: try_from_kernel ( self )
28
52
}
29
53
}
30
54
31
- impl TryFrom < & StructField > for ArrowField {
32
- type Error = ArrowError ;
55
+ impl < KernelType , ArrowType > TryIntoKernel < KernelType > for ArrowType
56
+ where
57
+ KernelType : TryFromArrow < ArrowType > ,
58
+ {
59
+ fn try_into_kernel ( self ) -> Result < KernelType , ArrowError > {
60
+ KernelType :: try_from_arrow ( self )
61
+ }
62
+ }
63
+
64
+ impl TryFromKernel < & StructType > for ArrowSchema {
65
+ fn try_from_kernel ( s : & StructType ) -> Result < Self , ArrowError > {
66
+ let fields: Vec < ArrowField > = s. fields ( ) . map ( |f| f. try_into_arrow ( ) ) . try_collect ( ) ?;
67
+ Ok ( ArrowSchema :: new ( fields) )
68
+ }
69
+ }
33
70
34
- fn try_from ( f : & StructField ) -> Result < Self , ArrowError > {
71
+ impl TryFromKernel < & StructField > for ArrowField {
72
+ fn try_from_kernel ( f : & StructField ) -> Result < Self , ArrowError > {
35
73
let metadata = f
36
74
. metadata ( )
37
75
. iter ( )
@@ -42,45 +80,33 @@ impl TryFrom<&StructField> for ArrowField {
42
80
. collect :: < Result < _ , serde_json:: Error > > ( )
43
81
. map_err ( |err| ArrowError :: JsonError ( err. to_string ( ) ) ) ?;
44
82
45
- let field = ArrowField :: new (
46
- f. name ( ) ,
47
- ArrowDataType :: try_from ( f. data_type ( ) ) ?,
48
- f. is_nullable ( ) ,
49
- )
50
- . with_metadata ( metadata) ;
83
+ let field = ArrowField :: new ( f. name ( ) , f. data_type ( ) . try_into_arrow ( ) ?, f. is_nullable ( ) )
84
+ . with_metadata ( metadata) ;
51
85
52
86
Ok ( field)
53
87
}
54
88
}
55
89
56
- impl TryFrom < & ArrayType > for ArrowField {
57
- type Error = ArrowError ;
58
-
59
- fn try_from ( a : & ArrayType ) -> Result < Self , ArrowError > {
90
+ impl TryFromKernel < & ArrayType > for ArrowField {
91
+ fn try_from_kernel ( a : & ArrayType ) -> Result < Self , ArrowError > {
60
92
Ok ( ArrowField :: new (
61
93
LIST_ARRAY_ROOT ,
62
- ArrowDataType :: try_from ( a. element_type ( ) ) ?,
94
+ a. element_type ( ) . try_into_arrow ( ) ?,
63
95
a. contains_null ( ) ,
64
96
) )
65
97
}
66
98
}
67
99
68
- impl TryFrom < & MapType > for ArrowField {
69
- type Error = ArrowError ;
70
-
71
- fn try_from ( a : & MapType ) -> Result < Self , ArrowError > {
100
+ impl TryFromKernel < & MapType > for ArrowField {
101
+ fn try_from_kernel ( a : & MapType ) -> Result < Self , ArrowError > {
72
102
Ok ( ArrowField :: new (
73
103
MAP_ROOT_DEFAULT ,
74
104
ArrowDataType :: Struct (
75
105
vec ! [
76
- ArrowField :: new(
77
- MAP_KEY_DEFAULT ,
78
- ArrowDataType :: try_from( a. key_type( ) ) ?,
79
- false ,
80
- ) ,
106
+ ArrowField :: new( MAP_KEY_DEFAULT , a. key_type( ) . try_into_arrow( ) ?, false ) ,
81
107
ArrowField :: new(
82
108
MAP_VALUE_DEFAULT ,
83
- ArrowDataType :: try_from ( a. value_type( ) ) ?,
109
+ a. value_type( ) . try_into_arrow ( ) ?,
84
110
a. value_contains_null( ) ,
85
111
) ,
86
112
]
@@ -91,10 +117,8 @@ impl TryFrom<&MapType> for ArrowField {
91
117
}
92
118
}
93
119
94
- impl TryFrom < & DataType > for ArrowDataType {
95
- type Error = ArrowError ;
96
-
97
- fn try_from ( t : & DataType ) -> Result < Self , ArrowError > {
120
+ impl TryFromKernel < & DataType > for ArrowDataType {
121
+ fn try_from_kernel ( t : & DataType ) -> Result < Self , ArrowError > {
98
122
match t {
99
123
DataType :: Primitive ( p) => {
100
124
match p {
@@ -128,54 +152,49 @@ impl TryFrom<&DataType> for ArrowDataType {
128
152
}
129
153
DataType :: Struct ( s) => Ok ( ArrowDataType :: Struct (
130
154
s. fields ( )
131
- . map ( TryInto :: try_into )
155
+ . map ( TryIntoArrow :: try_into_arrow )
132
156
. collect :: < Result < Vec < ArrowField > , ArrowError > > ( ) ?
133
157
. into ( ) ,
134
158
) ) ,
135
- DataType :: Array ( a) => Ok ( ArrowDataType :: List ( Arc :: new ( a. as_ref ( ) . try_into ( ) ?) ) ) ,
136
- DataType :: Map ( m) => Ok ( ArrowDataType :: Map ( Arc :: new ( m. as_ref ( ) . try_into ( ) ?) , false ) ) ,
159
+ DataType :: Array ( a) => Ok ( ArrowDataType :: List ( Arc :: new ( a. as_ref ( ) . try_into_arrow ( ) ?) ) ) ,
160
+ DataType :: Map ( m) => Ok ( ArrowDataType :: Map (
161
+ Arc :: new ( m. as_ref ( ) . try_into_arrow ( ) ?) ,
162
+ false ,
163
+ ) ) ,
137
164
}
138
165
}
139
166
}
140
167
141
- impl TryFrom < & ArrowSchema > for StructType {
142
- type Error = ArrowError ;
143
-
144
- fn try_from ( arrow_schema : & ArrowSchema ) -> Result < Self , ArrowError > {
168
+ impl TryFromArrow < & ArrowSchema > for StructType {
169
+ fn try_from_arrow ( arrow_schema : & ArrowSchema ) -> Result < Self , ArrowError > {
145
170
StructType :: try_new (
146
171
arrow_schema
147
172
. fields ( )
148
173
. iter ( )
149
- . map ( |field| field. as_ref ( ) . try_into ( ) ) ,
174
+ . map ( |field| field. as_ref ( ) . try_into_kernel ( ) ) ,
150
175
)
151
176
}
152
177
}
153
178
154
- impl TryFrom < ArrowSchemaRef > for StructType {
155
- type Error = ArrowError ;
156
-
157
- fn try_from ( arrow_schema : ArrowSchemaRef ) -> Result < Self , ArrowError > {
158
- arrow_schema. as_ref ( ) . try_into ( )
179
+ impl TryFromArrow < ArrowSchemaRef > for StructType {
180
+ fn try_from_arrow ( arrow_schema : ArrowSchemaRef ) -> Result < Self , ArrowError > {
181
+ arrow_schema. as_ref ( ) . try_into_kernel ( )
159
182
}
160
183
}
161
184
162
- impl TryFrom < & ArrowField > for StructField {
163
- type Error = ArrowError ;
164
-
165
- fn try_from ( arrow_field : & ArrowField ) -> Result < Self , ArrowError > {
185
+ impl TryFromArrow < & ArrowField > for StructField {
186
+ fn try_from_arrow ( arrow_field : & ArrowField ) -> Result < Self , ArrowError > {
166
187
Ok ( StructField :: new (
167
188
arrow_field. name ( ) . clone ( ) ,
168
- DataType :: try_from ( arrow_field. data_type ( ) ) ?,
189
+ DataType :: try_from_arrow ( arrow_field. data_type ( ) ) ?,
169
190
arrow_field. is_nullable ( ) ,
170
191
)
171
192
. with_metadata ( arrow_field. metadata ( ) . iter ( ) . map ( |( k, v) | ( k. clone ( ) , v) ) ) )
172
193
}
173
194
}
174
195
175
- impl TryFrom < & ArrowDataType > for DataType {
176
- type Error = ArrowError ;
177
-
178
- fn try_from ( arrow_datatype : & ArrowDataType ) -> Result < Self , ArrowError > {
196
+ impl TryFromArrow < & ArrowDataType > for DataType {
197
+ fn try_from_arrow ( arrow_datatype : & ArrowDataType ) -> Result < Self , ArrowError > {
179
198
match arrow_datatype {
180
199
ArrowDataType :: Utf8 => Ok ( DataType :: STRING ) ,
181
200
ArrowDataType :: LargeUtf8 => Ok ( DataType :: STRING ) ,
@@ -212,28 +231,38 @@ impl TryFrom<&ArrowDataType> for DataType {
212
231
{
213
232
Ok ( DataType :: TIMESTAMP )
214
233
}
215
- ArrowDataType :: Struct ( fields) => {
216
- DataType :: try_struct_type ( fields. iter ( ) . map ( |field| field. as_ref ( ) . try_into ( ) ) )
217
- }
218
- ArrowDataType :: List ( field) => {
219
- Ok ( ArrayType :: new ( ( * field) . data_type ( ) . try_into ( ) ?, ( * field) . is_nullable ( ) ) . into ( ) )
220
- }
221
- ArrowDataType :: ListView ( field) => {
222
- Ok ( ArrayType :: new ( ( * field) . data_type ( ) . try_into ( ) ?, ( * field) . is_nullable ( ) ) . into ( ) )
223
- }
224
- ArrowDataType :: LargeList ( field) => {
225
- Ok ( ArrayType :: new ( ( * field) . data_type ( ) . try_into ( ) ?, ( * field) . is_nullable ( ) ) . into ( ) )
226
- }
227
- ArrowDataType :: LargeListView ( field) => {
228
- Ok ( ArrayType :: new ( ( * field) . data_type ( ) . try_into ( ) ?, ( * field) . is_nullable ( ) ) . into ( ) )
229
- }
230
- ArrowDataType :: FixedSizeList ( field, _) => {
231
- Ok ( ArrayType :: new ( ( * field) . data_type ( ) . try_into ( ) ?, ( * field) . is_nullable ( ) ) . into ( ) )
232
- }
234
+ ArrowDataType :: Struct ( fields) => DataType :: try_struct_type (
235
+ fields. iter ( ) . map ( |field| field. as_ref ( ) . try_into_kernel ( ) ) ,
236
+ ) ,
237
+ ArrowDataType :: List ( field) => Ok ( ArrayType :: new (
238
+ ( * field) . data_type ( ) . try_into_kernel ( ) ?,
239
+ ( * field) . is_nullable ( ) ,
240
+ )
241
+ . into ( ) ) ,
242
+ ArrowDataType :: ListView ( field) => Ok ( ArrayType :: new (
243
+ ( * field) . data_type ( ) . try_into_kernel ( ) ?,
244
+ ( * field) . is_nullable ( ) ,
245
+ )
246
+ . into ( ) ) ,
247
+ ArrowDataType :: LargeList ( field) => Ok ( ArrayType :: new (
248
+ ( * field) . data_type ( ) . try_into_kernel ( ) ?,
249
+ ( * field) . is_nullable ( ) ,
250
+ )
251
+ . into ( ) ) ,
252
+ ArrowDataType :: LargeListView ( field) => Ok ( ArrayType :: new (
253
+ ( * field) . data_type ( ) . try_into_kernel ( ) ?,
254
+ ( * field) . is_nullable ( ) ,
255
+ )
256
+ . into ( ) ) ,
257
+ ArrowDataType :: FixedSizeList ( field, _) => Ok ( ArrayType :: new (
258
+ ( * field) . data_type ( ) . try_into_kernel ( ) ?,
259
+ ( * field) . is_nullable ( ) ,
260
+ )
261
+ . into ( ) ) ,
233
262
ArrowDataType :: Map ( field, _) => {
234
263
if let ArrowDataType :: Struct ( struct_fields) = field. data_type ( ) {
235
- let key_type = DataType :: try_from ( struct_fields[ 0 ] . data_type ( ) ) ?;
236
- let value_type = DataType :: try_from ( struct_fields[ 1 ] . data_type ( ) ) ?;
264
+ let key_type = DataType :: try_from_arrow ( struct_fields[ 0 ] . data_type ( ) ) ?;
265
+ let value_type = DataType :: try_from_arrow ( struct_fields[ 1 ] . data_type ( ) ) ?;
237
266
let value_type_nullable = struct_fields[ 1 ] . is_nullable ( ) ;
238
267
Ok ( MapType :: new ( key_type, value_type, value_type_nullable) . into ( ) )
239
268
} else {
@@ -242,7 +271,9 @@ impl TryFrom<&ArrowDataType> for DataType {
242
271
}
243
272
// Dictionary types are just an optimized in-memory representation of an array.
244
273
// Schema-wise, they are the same as the value type.
245
- ArrowDataType :: Dictionary ( _, value_type) => Ok ( value_type. as_ref ( ) . try_into ( ) ?) ,
274
+ ArrowDataType :: Dictionary ( _, value_type) => {
275
+ Ok ( value_type. as_ref ( ) . try_into_kernel ( ) ?)
276
+ }
246
277
s => Err ( ArrowError :: SchemaError ( format ! (
247
278
"Invalid data type for Delta Lake: {s}"
248
279
) ) ) ,
@@ -252,6 +283,7 @@ impl TryFrom<&ArrowDataType> for DataType {
252
283
253
284
#[ cfg( test) ]
254
285
mod tests {
286
+ use super :: * ;
255
287
use crate :: engine:: arrow_conversion:: ArrowField ;
256
288
use crate :: {
257
289
schema:: { DataType , StructField } ,
@@ -265,7 +297,7 @@ mod tests {
265
297
metadata. insert ( "description" , "hello world" . to_owned ( ) ) ;
266
298
let struct_field = StructField :: not_null ( "name" , DataType :: STRING ) . with_metadata ( metadata) ;
267
299
268
- let arrow_field = ArrowField :: try_from ( & struct_field) ?;
300
+ let arrow_field = ArrowField :: try_from_kernel ( & struct_field) ?;
269
301
let new_metadata = arrow_field. metadata ( ) ;
270
302
271
303
assert_eq ! (
0 commit comments